diff --git a/common/type_system/Type.cpp b/common/type_system/Type.cpp index f40964e9b5..755f63ee01 100644 --- a/common/type_system/Type.cpp +++ b/common/type_system/Type.cpp @@ -165,6 +165,21 @@ bool Type::get_my_method(const std::string& name, MethodInfo* out) const { return false; } +/*! + * Get a method that is defined specifically in this type by id. Returns if it was found or not. + */ +bool Type::get_my_method(int id, MethodInfo* out) const { + assert(id > 0); // 0 is new, should use explicit new method functions instead. + for (auto& x : m_methods) { + if (x.id == id) { + *out = x; + return true; + } + } + + return false; +} + /*! * Get the last method defined specifically for this type. Returns if there were any methods * defined specifically for this type or not. diff --git a/common/type_system/Type.h b/common/type_system/Type.h index a5f32670c4..76af676556 100644 --- a/common/type_system/Type.h +++ b/common/type_system/Type.h @@ -69,6 +69,7 @@ class Type { std::string get_parent() const; void set_runtime_type(std::string name); bool get_my_method(const std::string& name, MethodInfo* out) const; + bool get_my_method(int id, MethodInfo* out) const; bool get_my_last_method(MethodInfo* out) const; bool get_my_new_method(MethodInfo* out) const; const MethodInfo& add_method(const MethodInfo& info); diff --git a/common/type_system/TypeSystem.cpp b/common/type_system/TypeSystem.cpp index 9876faf166..6d8a31991b 100644 --- a/common/type_system/TypeSystem.cpp +++ b/common/type_system/TypeSystem.cpp @@ -425,6 +425,40 @@ MethodInfo TypeSystem::lookup_method(const std::string& type_name, const std::st throw std::runtime_error("lookup_method failed"); } +/*! + * Lookup information on a method by ID number. Error if it can't be found. Will check parent types + * if the given type doesn't specialize the method. + */ +MethodInfo TypeSystem::lookup_method(const std::string& type_name, int method_id) { + if (method_id == GOAL_NEW_METHOD) { + return lookup_new_method(type_name); + } + + MethodInfo info; + + // first lookup the type + auto* type = lookup_type(type_name); + + auto* iter_type = type; + // look up the method + while (true) { + if (iter_type->get_my_method(method_id, &info)) { + return info; + } + + if (iter_type->has_parent()) { + iter_type = lookup_type(iter_type->get_parent()); + } else { + // couldn't find method. + break; + } + } + + fmt::print("[TypeSystem] The method with id {} of type {} could not be found.\n", method_id, + type_name); + throw std::runtime_error("lookup_method failed"); +} + /*! * Lookup information on a new method and get the most specialized version. */ @@ -715,7 +749,14 @@ void TypeSystem::add_builtin_types() { add_field_to_type(kheap_type, "top-base", make_typespec("pointer")); // todo - (void)array_type; + builtin_structure_inherit(array_type); + add_method(array_type, "new", + make_function_typespec({"symbol", "type", "type", "int"}, "_type_")); + // array has: number, number, type + add_field_to_type(array_type, "length", make_typespec("int32")); + add_field_to_type(array_type, "allocated-length", make_typespec("int32")); + add_field_to_type(array_type, "content-type", make_typespec("type")); + add_field_to_type(array_type, "data", make_typespec("uint8"), false, true); // pair pair_type->override_offset(2); @@ -1064,6 +1105,10 @@ TypeSpec TypeSystem::lowest_common_ancestor(const TypeSpec& a, const TypeSpec& b return result; } +TypeSpec TypeSystem::lowest_common_ancestor_reg(const TypeSpec& a, const TypeSpec& b) { + return coerce_to_reg_type(lowest_common_ancestor(a, b)); +} + /*! * Lowest common ancestor of multiple (or at least one) type. */ @@ -1083,12 +1128,12 @@ TypeSpec TypeSystem::lowest_common_ancestor(const std::vector& types) TypeSpec coerce_to_reg_type(const TypeSpec& in) { if (in.arg_count() == 0) { if (in.base_type() == "int8" || in.base_type() == "int16" || in.base_type() == "int32" || - in.base_type() == "int64") { + in.base_type() == "int64" || in.base_type() == "integer") { return TypeSpec("int"); } if (in.base_type() == "uint8" || in.base_type() == "uint16" || in.base_type() == "uint32" || - in.base_type() == "uint64") { + in.base_type() == "uint64" || in.base_type() == "uinteger") { return TypeSpec("uint"); } } @@ -1133,6 +1178,9 @@ bool TypeSystem::reverse_deref(const ReverseDerefInputInfo& input, token.kind = ReverseDerefInfo::DerefToken::INDEX; token.index = closest_index; + if (!di.mem_deref) { + return false; + } assert(di.mem_deref); if (offset_into_elt == 0) { if (input.mem_deref) { diff --git a/common/type_system/TypeSystem.h b/common/type_system/TypeSystem.h index f0e028c119..d32c6de977 100644 --- a/common/type_system/TypeSystem.h +++ b/common/type_system/TypeSystem.h @@ -96,6 +96,7 @@ class TypeSystem { bool allow_new_method = true); MethodInfo add_new_method(Type* type, const TypeSpec& ts); MethodInfo lookup_method(const std::string& type_name, const std::string& method_name); + MethodInfo lookup_method(const std::string& type_name, int method_id); MethodInfo lookup_new_method(const std::string& type_name); void assert_method_id(const std::string& type_name, const std::string& method_name, int id); @@ -135,6 +136,7 @@ class TypeSystem { } TypeSpec lowest_common_ancestor(const TypeSpec& a, const TypeSpec& b); + TypeSpec lowest_common_ancestor_reg(const TypeSpec& a, const TypeSpec& b); TypeSpec lowest_common_ancestor(const std::vector& types); private: diff --git a/decompiler/CMakeLists.txt b/decompiler/CMakeLists.txt index b030592f52..4573cba883 100644 --- a/decompiler/CMakeLists.txt +++ b/decompiler/CMakeLists.txt @@ -20,7 +20,10 @@ add_executable(decompiler data/tpage.cpp data/game_text.cpp data/StrFileReader.cpp - data/game_count.cpp data/LinkedWordReader.h) + data/game_count.cpp + Function/TypeAnalysis.cpp + IR/IR_TypeAnalysis.cpp + util/TP_Type.cpp) target_link_libraries(decompiler goos diff --git a/decompiler/Function/BasicBlocks.h b/decompiler/Function/BasicBlocks.h index d4248f1896..df2c7bf04e 100644 --- a/decompiler/Function/BasicBlocks.h +++ b/decompiler/Function/BasicBlocks.h @@ -5,6 +5,7 @@ #include "CfgVtx.h" #include "decompiler/util/DecompilerTypeSystem.h" +#include "decompiler/util/TP_Type.h" class LinkedObjectFile; class Function; @@ -26,6 +27,11 @@ struct BasicBlock { BasicBlock(int _start_word, int _end_word) : start_word(_start_word), end_word(_end_word) {} }; +struct BlockTopologicalSort { + std::vector vist_order; + std::unordered_set unreachable; +}; + std::vector find_blocks_in_function(const LinkedObjectFile& file, int seg, const Function& func); diff --git a/decompiler/Function/Function.cpp b/decompiler/Function/Function.cpp index 11b2def19d..fdec77d5ef 100644 --- a/decompiler/Function/Function.cpp +++ b/decompiler/Function/Function.cpp @@ -683,10 +683,6 @@ std::shared_ptr Function::get_basic_op_at_instr(int idx) { return basic_ops.at(instruction_to_basic_op.at(idx)); } -const TypeMap& Function::get_typemap_by_instr_idx(int idx) { - return basic_op_typemaps.at(instruction_to_basic_op.at(idx)); -} - int Function::get_basic_op_count() { return basic_ops.size(); } @@ -709,4 +705,43 @@ int Function::get_reginfo_basic_op_count() { } } return count; +} + +/*! + * Topological sort of basic blocks. + * Returns a valid ordering + a list of blocks that you can't reach and therefore + * aren't in the ordering. + */ +BlockTopologicalSort Function::bb_topo_sort() { + BlockTopologicalSort result; + std::unordered_set visit_set; + std::vector visit_queue; + if (basic_blocks.empty()) { + assert(false); + } + + visit_queue.push_back(0); + + while (!visit_queue.empty()) { + // let's visit the most recently added: + auto to_visit = visit_queue.back(); + visit_queue.pop_back(); + result.vist_order.push_back(to_visit); + + auto& block = basic_blocks.at(to_visit); + for (auto next : {block.succ_branch, block.succ_ft}) { + if (next != -1 && visit_set.find(next) == visit_set.end()) { + visit_set.insert(next); + visit_queue.push_back(next); + } + } + } + + for (int i = 0; i < int(basic_blocks.size()); i++) { + if (visit_set.find(i) == visit_set.end()) { + result.unreachable.insert(i); + } + } + + return result; } \ No newline at end of file diff --git a/decompiler/Function/Function.h b/decompiler/Function/Function.h index 1455a957a4..34e1f47bcb 100644 --- a/decompiler/Function/Function.h +++ b/decompiler/Function/Function.h @@ -13,8 +13,6 @@ #include "common/type_system/TypeSpec.h" class DecompilerTypeSystem; -// Map of what type is in each register. -using TypeMap = std::unordered_map; class IR_Atomic; class IR; @@ -65,11 +63,6 @@ struct FunctionName { } }; -class BasicOpTypeInfo { - public: - std::unordered_map all_reg_types; -}; - class Function { public: Function(int _start_word, int _end_word); @@ -79,16 +72,15 @@ class Function { void find_type_defs(LinkedObjectFile& file, DecompilerTypeSystem& dts); void add_basic_op(std::shared_ptr op, int start_instr, int end_instr); bool has_basic_ops() { return !basic_ops.empty(); } - bool has_typemaps() { return !basic_op_typemaps.empty(); } bool instr_starts_basic_op(int idx); std::shared_ptr get_basic_op_at_instr(int idx); - const TypeMap& get_typemap_by_instr_idx(int idx); int get_basic_op_count(); int get_failed_basic_op_count(); int get_reginfo_basic_op_count(); - void run_type_analysis(const TypeSpec& my_type, + bool run_type_analysis(const TypeSpec& my_type, DecompilerTypeSystem& dts, LinkedObjectFile& file); + BlockTopologicalSort bb_topo_sort(); TypeSpec type; @@ -117,6 +109,8 @@ class Function { std::string warnings; bool contains_asm_ops = false; + bool attempted_type_analysis = false; + struct Prologue { bool decoded = false; // have we removed the prologue from basic blocks? int total_stack_usage = -1; @@ -150,7 +144,6 @@ class Function { private: void check_epilogue(const LinkedObjectFile& file); - std::vector basic_op_typemaps; std::unordered_map instruction_to_basic_op; std::unordered_map basic_op_to_instruction; }; diff --git a/decompiler/Function/TypeAnalysis.cpp b/decompiler/Function/TypeAnalysis.cpp new file mode 100644 index 0000000000..5f49a2aac0 --- /dev/null +++ b/decompiler/Function/TypeAnalysis.cpp @@ -0,0 +1,101 @@ +#include "TypeAnalysis.h" +#include "decompiler/IR/IR.h" +#include "third-party/fmt/core.h" +#include "decompiler/config.h" + +namespace { +TypeState construct_initial_typestate(const TypeSpec& f_ts) { + TypeState result; + int goal_args[] = {Reg::A0, Reg::A1, Reg::A2, Reg::A3, Reg::T0, Reg::T1, Reg::T2, Reg::T3}; + assert(f_ts.base_type() == "function"); + assert(f_ts.arg_count() >= 1); + assert(f_ts.arg_count() <= 8); + for (int i = 0; i < int(f_ts.arg_count()) - 1; i++) { + auto reg_id = goal_args[i]; + auto reg_type = f_ts.get_arg(i); + result.gpr_types[reg_id].ts = reg_type; + result.gpr_types[reg_id].kind = TP_Type::OBJECT_OF_TYPE; + } + return result; +} +} // namespace + +bool Function::run_type_analysis(const TypeSpec& my_type, + DecompilerTypeSystem& dts, + LinkedObjectFile& file) { + // STEP 0 - setup settings + dts.type_prop_settings.reset(); + if (get_config().pair_functions_by_name.find(guessed_name.to_string()) != + get_config().pair_functions_by_name.end()) { + dts.type_prop_settings.allow_pair = true; + } + + if (guessed_name.kind == FunctionName::FunctionKind::METHOD) { + dts.type_prop_settings.current_method_type = guessed_name.type_name; + } + + // STEP 1 - get the topo sort. + auto order = bb_topo_sort(); + // fmt::print("blocks: {}\n ", basic_blocks.size()); + // for (auto x : order.vist_order) { + // fmt::print("{} ", x); + // } + // fmt::print("\n"); + + // STEP 2 - establish visit order + assert(!order.vist_order.empty()); + assert(order.vist_order.front() == 0); + + // STEP 3 - initialize type state. + basic_blocks.at(0).init_types = construct_initial_typestate(my_type); + + // STEP 2 - loop while types are changing + bool run_again = true; + while (run_again) { + run_again = false; + // each block in order now. + for (auto block_id : order.vist_order) { + auto& block = basic_blocks.at(block_id); + TypeState* init_types = &block.init_types; + for (int op_id = block.start_basic_op; op_id < block.end_basic_op; op_id++) { + auto& op = basic_ops.at(op_id); + + // while the implementation of propagate_types is in progress, it may throw + // for unimplemented cases. Eventually this try/catch should be removed. + try { + op->propagate_types(*init_types, file, dts); + } catch (std::runtime_error& e) { + fmt::print("Type prop fail: {}\n\n\n", e.what()); + warnings += "Type prop attempted and failed. "; + return false; + } + + // todo, set run again?? + + // for the next op... + init_types = &op->end_types; + } + + // propagate the types: for each possible succ + for (auto succ_block_id : {block.succ_ft, block.succ_branch}) { + if (succ_block_id != -1) { + auto& succ_block = basic_blocks.at(succ_block_id); + // set types to LCA (current, new) + if (dts.tp_lca(&succ_block.init_types, *init_types)) { + // if something changed, run again! + run_again = true; + } + } + } + } + } + + auto last_op = basic_ops.back(); + auto last_type = last_op->end_types.get(Register(Reg::GPR, Reg::V0)).as_typespec(); + if (last_type != my_type.last_arg()) { + warnings += fmt::format("return type mismatch {} vs {}. ", last_type.print(), + my_type.last_arg().print()); + } + + return true; +} \ No newline at end of file diff --git a/decompiler/Function/TypeAnalysis.h b/decompiler/Function/TypeAnalysis.h new file mode 100644 index 0000000000..7c47610006 --- /dev/null +++ b/decompiler/Function/TypeAnalysis.h @@ -0,0 +1,2 @@ +#pragma once +#include "Function.h" diff --git a/decompiler/IR/BasicOpBuilder.cpp b/decompiler/IR/BasicOpBuilder.cpp index 02696b3928..e97cc35cd9 100644 --- a/decompiler/IR/BasicOpBuilder.cpp +++ b/decompiler/IR/BasicOpBuilder.cpp @@ -179,6 +179,13 @@ std::shared_ptr try_or(Instruction& instr, int idx) { op->write_regs.push_back(dest); op->reg_info_set = true; return op; + } else if (is_gpr_3(instr, InstructionKind::OR, {}, make_gpr(Reg::R0), make_gpr(Reg::R0))) { + auto dest = instr.get_dst(0).get_reg(); + auto op = make_set_atomic(IR_Set_Atomic::REG_64, make_reg(dest, idx), + std::make_shared(0)); + op->write_regs.push_back(dest); + op->reg_info_set = true; + return op; } else if (is_gpr_3(instr, InstructionKind::OR, {}, {}, make_gpr(Reg::R0))) { // set register from register : or dest, source, r0 auto dest = instr.get_dst(0).get_reg(); @@ -726,6 +733,20 @@ std::shared_ptr try_daddiu(Instruction& instr, int idx) { op->write_regs.push_back(instr.get_dst(0).get_reg()); op->reg_info_set = true; return op; + } else if (instr.kind == InstructionKind::DADDIU && instr.get_src(0).is_reg(make_gpr(Reg::S7)) && + instr.get_src(1).is_imm() && instr.get_src(1).get_imm() == -10) { + auto op = make_set_atomic(IR_Set_Atomic::REG_64, make_reg(instr.get_dst(0).get_reg(), idx), + std::make_shared()); + op->write_regs.push_back(instr.get_dst(0).get_reg()); + op->reg_info_set = true; + return op; + } else if (instr.kind == InstructionKind::DADDIU && instr.get_src(0).is_reg(make_gpr(Reg::S7)) && + instr.get_src(1).is_imm() && instr.get_src(1).get_imm() == -32768) { + auto op = make_set_atomic(IR_Set_Atomic::REG_64, make_reg(instr.get_dst(0).get_reg(), idx), + std::make_shared("__START-OF-TABLE__")); + op->write_regs.push_back(instr.get_dst(0).get_reg()); + op->reg_info_set = true; + return op; } else if (instr.kind == InstructionKind::DADDIU && instr.get_src(0).is_reg(make_gpr(Reg::FP)) && instr.get_src(1).kind == InstructionAtom::LABEL) { auto op = make_set_atomic(IR_Set_Atomic::REG_64, make_reg(instr.get_dst(0).get_reg(), idx), @@ -745,8 +766,16 @@ std::shared_ptr try_daddiu(Instruction& instr, int idx) { } std::shared_ptr try_daddu(Instruction& instr, int idx) { - if (is_gpr_3(instr, InstructionKind::DADDU, {}, {}, {}) && - !instr.get_src(0).is_reg(make_gpr(Reg::S7)) && !instr.get_src(1).is_reg(make_gpr(Reg::S7))) { + if (is_gpr_3(instr, InstructionKind::DADDU, {}, make_gpr(Reg::R0), {})) { + auto op = make_set_atomic( + IR_Set_Atomic::REG_64, make_reg(instr.get_dst(0).get_reg(), idx), + std::make_shared(IR_IntMath2::ADD, make_reg(instr.get_src(1).get_reg(), idx), + std::make_shared(0))); + op->update_reginfo_self(1, 1, 0); + return op; + } else if (is_gpr_3(instr, InstructionKind::DADDU, {}, {}, {}) && + !instr.get_src(0).is_reg(make_gpr(Reg::S7)) && + !instr.get_src(1).is_reg(make_gpr(Reg::S7))) { auto op = make_set_atomic( IR_Set_Atomic::REG_64, make_reg(instr.get_dst(0).get_reg(), idx), std::make_shared(IR_IntMath2::ADD, make_reg(instr.get_src(0).get_reg(), idx), diff --git a/decompiler/IR/IR.cpp b/decompiler/IR/IR.cpp index a01d27ea21..09b25155ea 100644 --- a/decompiler/IR/IR.cpp +++ b/decompiler/IR/IR.cpp @@ -78,11 +78,17 @@ std::string IR_Atomic::print_with_reguse(const LinkedObjectFile& file) const { std::string IR_Atomic::print_with_types(const TypeState& init_types, const LinkedObjectFile& file) const { - std::string result = print(file); + std::string result; + + for (auto& warning : warnings) { + result += ";; warn: " + warning + "\n"; + } + result += print(file); if (result.length() < 40) { result.append(40 - result.length(), ' '); } result += " ;; "; + auto read_mask = regs_to_gpr_mask(read_regs); auto write_mask = regs_to_gpr_mask(write_regs); @@ -362,6 +368,24 @@ void IR_SymbolValue::get_children(std::vector>* output) cons (void)output; } +goos::Object IR_EmptyPair::to_form(const LinkedObjectFile& file) const { + (void)file; + return pretty_print::to_symbol("'()"); +} + +void IR_EmptyPair::get_children(std::vector>* output) const { + (void)output; +} + +TP_Type IR_EmptyPair::get_expression_type(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) { + (void)input; + (void)file; + (void)dts; + return TP_Type(TypeSpec("pair")); +} + goos::Object IR_StaticAddress::to_form(const LinkedObjectFile& file) const { // return pretty_print::build_list(pretty_print::to_symbol("&"), file.get_label_name(label_id)); return pretty_print::to_symbol(file.get_label_name(label_id)); diff --git a/decompiler/IR/IR.h b/decompiler/IR/IR.h index 816266722c..cb49eb2ea9 100644 --- a/decompiler/IR/IR.h +++ b/decompiler/IR/IR.h @@ -8,6 +8,7 @@ #include "decompiler/Disasm/Register.h" #include "common/type_system/TypeSpec.h" #include "decompiler/util/DecompilerTypeSystem.h" +#include "decompiler/util/TP_Type.h" class LinkedObjectFile; class DecompilerTypeSystem; @@ -16,17 +17,16 @@ namespace goos { class Object; } -// Map of what type is in each register. -using TypeMap = std::unordered_map; - class IR { public: virtual goos::Object to_form(const LinkedObjectFile& file) const = 0; std::vector> get_all_ir(LinkedObjectFile& file) const; std::string print(const LinkedObjectFile& file) const; virtual void get_children(std::vector>* output) const = 0; - bool is_basic_op = false; + virtual TP_Type get_expression_type(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts); }; class IR_Atomic : public virtual IR { @@ -35,7 +35,12 @@ class IR_Atomic : public virtual IR { bool reg_info_set = false; TypeState end_types; // types at the end of this instruction + std::vector warnings; + void warn(const std::string& str) { warnings.emplace_back(str); } + virtual void propagate_types(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts); std::string print_with_types(const TypeState& init_types, const LinkedObjectFile& file) const; std::string print_with_reguse(const LinkedObjectFile& file) const; }; @@ -59,6 +64,9 @@ class IR_Register : public virtual IR { void get_children(std::vector>* output) const override; Register reg; int instr_idx = -1; + TP_Type get_expression_type(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) override; }; class IR_Set : public virtual IR { @@ -90,8 +98,10 @@ class IR_Set_Atomic : public IR_Set, public IR_Atomic { template void update_reginfo_self(int n_dest, int n_src, int n_clobber); - void update_reginfo_regreg(); + void propagate_types(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) override; }; class IR_IntMath2; @@ -119,6 +129,9 @@ class IR_Store_Atomic : public IR_Set_Atomic { int size; goos::Object to_form(const LinkedObjectFile& file) const override; void update_reginfo_self(int n_dest, int n_src, int n_clobber); + void propagate_types(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) override; }; class IR_Symbol : public virtual IR { @@ -127,6 +140,9 @@ class IR_Symbol : public virtual IR { std::string name; goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; + TP_Type get_expression_type(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) override; }; class IR_SymbolValue : public virtual IR { @@ -135,6 +151,19 @@ class IR_SymbolValue : public virtual IR { std::string name; goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; + TP_Type get_expression_type(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) override; +}; + +class IR_EmptyPair : public virtual IR { + public: + explicit IR_EmptyPair() = default; + goos::Object to_form(const LinkedObjectFile& file) const override; + void get_children(std::vector>* output) const override; + TP_Type get_expression_type(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) override; }; class IR_StaticAddress : public virtual IR { @@ -143,6 +172,9 @@ class IR_StaticAddress : public virtual IR { int label_id = -1; goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; + TP_Type get_expression_type(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) override; }; class IR_Load : public virtual IR { @@ -155,6 +187,9 @@ class IR_Load : public virtual IR { std::shared_ptr location; goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; + TP_Type get_expression_type(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) override; }; class IR_FloatMath2 : public virtual IR { @@ -165,6 +200,9 @@ class IR_FloatMath2 : public virtual IR { std::shared_ptr arg0, arg1; goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; + TP_Type get_expression_type(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) override; }; class IR_FloatMath1 : public virtual IR { @@ -202,6 +240,9 @@ class IR_IntMath2 : public virtual IR { std::shared_ptr arg0, arg1; goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; + TP_Type get_expression_type(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) override; }; class IR_IntMath1 : public virtual IR { @@ -211,6 +252,9 @@ class IR_IntMath1 : public virtual IR { std::shared_ptr arg; goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; + TP_Type get_expression_type(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) override; }; class IR_Call : public virtual IR { @@ -224,6 +268,9 @@ class IR_Call : public virtual IR { class IR_Call_Atomic : public virtual IR_Call, public IR_Atomic { public: IR_Call_Atomic() = default; + void propagate_types(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) override; }; class IR_IntegerConstant : public virtual IR { @@ -232,6 +279,9 @@ class IR_IntegerConstant : public virtual IR { explicit IR_IntegerConstant(int64_t _value) : value(_value) {} goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; + TP_Type get_expression_type(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) override; }; struct BranchDelay { @@ -254,6 +304,8 @@ struct BranchDelay { std::vector read_regs; std::vector write_regs; std::vector clobber_regs; + + void type_prop(TypeState& output, const LinkedObjectFile& file, DecompilerTypeSystem& dts); }; struct Condition { @@ -335,6 +387,9 @@ class IR_Branch_Atomic : public virtual IR_Branch, public IR_Atomic { : IR_Branch(std::move(_condition), _dest_label_idx, std::move(_branch_delay), _likely) {} // note - counts only for the condition. void update_reginfo_self(int n_dst, int n_src, int n_clobber); + void propagate_types(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) override; }; class IR_Compare : public virtual IR { @@ -345,6 +400,9 @@ class IR_Compare : public virtual IR { goos::Object to_form(const LinkedObjectFile& file) const override; void get_children(std::vector>* output) const override; + TP_Type get_expression_type(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) override; }; class IR_Nop : public virtual IR { @@ -357,6 +415,9 @@ class IR_Nop : public virtual IR { class IR_Nop_Atomic : public IR_Nop, public IR_Atomic { public: IR_Nop_Atomic() = default; + void propagate_types(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) override; }; class IR_Suspend : public virtual IR, public IR_Atomic { @@ -366,6 +427,15 @@ class IR_Suspend : public virtual IR, public IR_Atomic { void get_children(std::vector>* output) const override; }; +class IR_Breakpoint_Atomic : public virtual IR_Atomic { + IR_Breakpoint_Atomic() = default; + goos::Object to_form(const LinkedObjectFile& file) const override; + void get_children(std::vector>* output) const override; + void propagate_types(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) override; +}; + class IR_Begin : public virtual IR { public: IR_Begin() = default; @@ -484,6 +554,9 @@ class IR_AsmOp_Atomic : public virtual IR_AsmOp, public IR_Atomic { public: IR_AsmOp_Atomic(std::string _name) : IR_AsmOp(std::move(_name)) {} void set_reg_info(); + void propagate_types(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) override; }; class IR_CMoveF : public virtual IR { diff --git a/decompiler/IR/IR_TypeAnalysis.cpp b/decompiler/IR/IR_TypeAnalysis.cpp new file mode 100644 index 0000000000..39342c76b6 --- /dev/null +++ b/decompiler/IR/IR_TypeAnalysis.cpp @@ -0,0 +1,629 @@ +#include "IR.h" +#include "decompiler/util/DecompilerTypeSystem.h" +#include "third-party/fmt/core.h" +#include "common/goos/Object.h" +#include "decompiler/util/TP_Type.h" +#include "decompiler/ObjectFile/LinkedObjectFile.h" + +namespace { +bool is_plain_type(const TP_Type& type, const TypeSpec& ts) { + return type.as_typespec() == ts; +} + +bool is_integer_type(const TP_Type& type) { + return is_plain_type(type, TypeSpec("int")) || is_plain_type(type, TypeSpec("uint")); +} + +/*! + * If first arg is unsigned, make the result unsigned. + * Otherwise signed. This is the default GOAL behavior I guess. + */ +TP_Type get_int_type(const TP_Type& one) { + if (is_plain_type(one, TypeSpec("uint"))) { + return one; + } else { + return TP_Type(TypeSpec("int")); + } +} + +struct RegOffset { + Register reg; + int offset; +}; + +bool get_as_reg_offset(const IR* ir, RegOffset* out) { + auto as_reg = dynamic_cast(ir); + if (as_reg) { + out->reg = as_reg->reg; + out->offset = 0; + return true; + } + + auto as_math = dynamic_cast(ir); + if (as_math && as_math->kind == IR_IntMath2::ADD) { + auto first_as_reg = dynamic_cast(as_math->arg0.get()); + auto second_as_const = dynamic_cast(as_math->arg1.get()); + if (first_as_reg && second_as_const) { + out->reg = first_as_reg->reg; + out->offset = second_as_const->value; + return true; + } + } + return false; +} + +RegKind get_reg_kind(const Register& r) { + switch (r.get_kind()) { + case Reg::GPR: + return RegKind::GPR_64; + case Reg::FPR: + return RegKind::FLOAT; + default: + assert(false); + } +} +} // namespace + +void IR_Atomic::propagate_types(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) { + (void)input; + (void)dts; + throw std::runtime_error( + fmt::format("Could not propagate types for {}, not yet implemented", print(file))); +} + +TP_Type IR::get_expression_type(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) { + (void)input; + (void)dts; + throw std::runtime_error( + fmt::format("Could not get expression types for {}, not yet implemented", print(file))); +} + +void IR_Set_Atomic::propagate_types(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) { + // pass through types + end_types = input; + // modify as needed + switch (kind) { + case IR_Set::REG_64: + case IR_Set::LOAD: + case IR_Set::GPR_TO_FPR: + case IR_Set::FPR_TO_GPR64: + case IR_Set::REG_FLT: + case IR_Set::SYM_LOAD: { + auto as_reg = dynamic_cast(dst.get()); + assert(as_reg); + auto t = src->get_expression_type(input, file, dts); + end_types.get(as_reg->reg) = t; + } break; + + case IR_Set::SYM_STORE: { + auto as_reg = dynamic_cast(dst.get()); + assert(!as_reg); + return; + } + default: + throw std::runtime_error(fmt::format( + "Could not propagate types through IR_Set_Atomic, kind not handled {}", print(file))); + } +} + +TP_Type IR_Register::get_expression_type(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) { + (void)file; + (void)dts; + return input.get(reg); +} + +TP_Type IR_Load::get_expression_type(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) { + (void)input; + auto as_static = dynamic_cast(location.get()); + if (as_static) { + if (kind == FLOAT) { + // loading static data with a FLOAT kind load (lwc1), assume result is a float. + return TP_Type(dts.ts.make_typespec("float")); + } + + if (size == 8) { + // kinda hacky + if (kind == SIGNED) { + return TP_Type(dts.ts.make_typespec("int")); + } else if (kind == UNSIGNED) { + return TP_Type(dts.ts.make_typespec("uint")); + } + } + } + + RegOffset ro; + if (get_as_reg_offset(location.get(), &ro)) { + auto& input_type = input.get(ro.reg); + + if (input_type.kind == TP_Type::TYPE_OBJECT && ro.offset >= 16 && (ro.offset & 3) == 0 && + size == 4 && kind == UNSIGNED) { + // method get + auto method_id = (ro.offset - 16) / 4; + if (input_type.ts.base_type() == "object" && method_id == GOAL_NEW_METHOD) { + // remember that we're an object new. + auto method_info = dts.ts.lookup_method(input_type.ts.print(), method_id); + auto result = TP_Type(method_info.type.substitute_for_method_call(input_type.ts.print())); + result.kind = TP_Type::METHOD_NEW_OF_OBJECT; + return result; + } + auto method_info = dts.ts.lookup_method(input_type.ts.print(), method_id); + return TP_Type(method_info.type.substitute_for_method_call(input_type.ts.print())); + } + + if (input_type.kind == TP_Type::OBJECT_OF_TYPE && + input_type.as_typespec() == TypeSpec("type") && ro.offset >= 16 && (ro.offset & 3) == 0 && + size == 4 && kind == UNSIGNED) { + // method get + auto method_id = (ro.offset - 16) / 4; + auto method_info = dts.ts.lookup_method("object", method_id); + return TP_Type(method_info.type.substitute_for_method_call("object")); + } + + if (input_type.kind == TP_Type::OBJECT_OF_TYPE && + input_type.as_typespec() == TypeSpec("pointer")) { + // we got a plain pointer. let's just assume we're loading an integer. + // perhaps we should disable this feature by default on 4-byte loads if we're getting + // lots of false positives for loading pointers from plain pointers. + switch (kind) { + case UNSIGNED: + switch (size) { + case 1: + return TP_Type(TypeSpec("uint")); + case 2: + return TP_Type(TypeSpec("uint")); + case 4: + return TP_Type(TypeSpec("uint")); + case 8: + return TP_Type(TypeSpec("uint")); + case 16: + return TP_Type(TypeSpec("uint")); + default: + assert(false); + } + break; + case SIGNED: + switch (size) { + case 1: + return TP_Type(TypeSpec("int")); + case 2: + return TP_Type(TypeSpec("int")); + case 4: + return TP_Type(TypeSpec("int")); + case 8: + return TP_Type(TypeSpec("int")); + case 16: + return TP_Type(TypeSpec("int")); + default: + assert(false); + } + break; + case FLOAT: + return TP_Type(TypeSpec("float")); + default: + assert(false); + } + } + + if (input_type.kind == TP_Type::PARTIAL_METHOD_TABLE_ACCESS && ro.offset == 16) { + // access method vtable + return TP_Type(TypeSpec("function")); + } else if (input_type.kind == TP_Type::OBJ_PLUS_PRODUCT) { + // note, we discard and completely ignore the stride here. + ReverseDerefInputInfo rd_in; + rd_in.mem_deref = true; + rd_in.input_type = input_type.ts; + rd_in.reg = get_reg_kind(ro.reg); // bleh + rd_in.offset = ro.offset; + rd_in.sign_extend = kind == SIGNED; + rd_in.load_size = size; + auto rd = dts.ts.get_reverse_deref_info(rd_in); + + if (rd.success) { + return TP_Type(coerce_to_reg_type(rd.result_type)); + } + } else { + if (input_type.as_typespec() == TypeSpec("object") && ro.offset == -4 && kind == UNSIGNED && + size == 4 && ro.reg.get_kind() == Reg::GPR) { + // get type of basic likely, but misrecognized as an object. + // occurs often in typecase-like structures because other possible types are "stripped". + return TP_Type(TypeSpec("type")); + } + + // nice + ReverseDerefInputInfo rd_in; + rd_in.mem_deref = true; + rd_in.input_type = input_type.as_typespec(); + rd_in.reg = get_reg_kind(ro.reg); // bleh + rd_in.offset = ro.offset; + rd_in.sign_extend = kind == SIGNED; + rd_in.load_size = size; + + auto rd = dts.ts.get_reverse_deref_info(rd_in); + if (!rd.success && !dts.type_prop_settings.allow_pair) { + printf("input type is %s, offset is %d, sign %d size %d\n", + rd_in.input_type.print().c_str(), rd_in.offset, rd_in.sign_extend, rd_in.load_size); + throw std::runtime_error( + fmt::format("Could not get type of load: {}. Reverse Deref Failed.", print(file))); + } + + if (rd.success) { + return TP_Type(coerce_to_reg_type(rd.result_type)); + } + + if (dts.type_prop_settings.allow_pair) { + if (kind == SIGNED && size == 4 && + (input_type.as_typespec() == TypeSpec("object") || + input_type.as_typespec() == TypeSpec("pair"))) { + // pair access! + if (ro.offset == 2) { + return TP_Type(TypeSpec("pair")); + } else if (ro.offset == -2) { + return TP_Type(TypeSpec("object")); + } + } + } + } + } + + throw std::runtime_error( + fmt::format("Could not get type of load: {}. Not handled.", print(file))); +} + +TP_Type IR_FloatMath2::get_expression_type(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) { + (void)input; + (void)file; + + // regardless of input types, the output is going to be a float. + switch (kind) { + case DIV: + case MUL: + case ADD: + case SUB: + case MIN: + case MAX: + return TP_Type(dts.ts.make_typespec("float")); + default: + assert(false); + } +} + +TP_Type IR_IntMath2::get_expression_type(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) { + auto arg0_type = arg0->get_expression_type(input, file, dts); + auto arg1_type = arg1->get_expression_type(input, file, dts); + + if (is_integer_type(arg0_type) && is_integer_type(arg1_type)) { + // case where both arguments are integers. + // in this case we assume we're actually doing math. + switch (kind) { + case ADD: + case SUB: + case AND: + case OR: + case NOR: + case XOR: + // we don't know if we're signed or unsigned. so let's just go with the first type. + return get_int_type(arg0_type); + case MUL_SIGNED: + case DIV_SIGNED: + case RIGHT_SHIFT_ARITH: + case MOD_SIGNED: + case MIN_SIGNED: + case MAX_SIGNED: + // result is going to be signed, regardless of inputs. + return TP_Type(TypeSpec("int")); + + case MUL_UNSIGNED: + case RIGHT_SHIFT_LOGIC: + // result is going to be unsigned, regardless of inputs. + return TP_Type(TypeSpec("uint")); + + case LEFT_SHIFT: { + // multiply! + auto as_const = dynamic_cast(arg1.get()); + if (as_const) { + // shift by constant integer. could be accessing the method array. + TP_Type result; + result.kind = TP_Type::PRODUCT; + result.ts = get_int_type(arg0_type).ts; + result.multiplier = (1 << as_const->value); + return result; + } else { + // normal variable shift. + return get_int_type(arg0_type); + } + } + default: + break; + } + } + + if (kind == ADD && arg0_type.kind == TP_Type::PRODUCT && arg1_type.is_object_of_type()) { + // access the methods! + return TP_Type::make_partial_method_table_access(); + } + + auto a1_const = dynamic_cast(arg1.get()); + if (a1_const && kind == ADD && arg0_type.kind == TP_Type::OBJECT_OF_TYPE) { + // access a field. + ReverseDerefInputInfo rd_in; + rd_in.mem_deref = false; + rd_in.input_type = arg0_type.as_typespec(); + rd_in.offset = a1_const->value; + rd_in.load_size = 0; + auto rd = dts.ts.get_reverse_deref_info(rd_in); + + if (rd.success) { + return TP_Type(coerce_to_reg_type(rd.result_type)); + } + } + + if (kind == ADD && is_integer_type(arg0_type) && arg1_type.kind == TP_Type::OBJECT_OF_TYPE) { + // product + object with multiplier 1 (access array of bytes for example) + TP_Type result; + result.kind = TP_Type::OBJ_PLUS_PRODUCT; + result.ts = arg1_type.as_typespec(); + result.multiplier = 1; + return result; + } + + if (kind == ADD && arg0_type.kind == TP_Type::PRODUCT && + arg1_type.kind == TP_Type::OBJECT_OF_TYPE) { + TP_Type result; + result.kind = TP_Type::OBJ_PLUS_PRODUCT; + result.ts = arg1_type.as_typespec(); + result.multiplier = arg0_type.multiplier; + return result; + } + + if ((arg0_type.as_typespec() == TypeSpec("object") || + arg0_type.as_typespec() == TypeSpec("pair")) && + is_integer_type(arg1_type)) { + // boxed object tag trick + return TP_Type(TypeSpec("int")); + } + + if (dts.ts.typecheck(TypeSpec("pointer"), arg0_type.as_typespec(), "", false, false) && + is_integer_type(arg1_type)) { + return arg0_type; + } + + throw std::runtime_error( + fmt::format("Can't get_expression_type on this IR_IntMath2: {}, args {} and {}", print(file), + arg0_type.print(), arg1_type.print())); +} + +void BranchDelay::type_prop(TypeState& output, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) { + (void)dts; + switch (kind) { + case DSLLV: { + // I think this is only used in ash, in which case the output should be an int/uint + // welll + auto dst = dynamic_cast(destination.get()); + assert(dst); + auto src = dynamic_cast(source.get()); + assert(src); + if (is_plain_type(output.get(src->reg), TypeSpec("uint"))) { + // todo, this won't catch child uint types. I think this doesn't matter though. + output.get(dst->reg) = TP_Type(TypeSpec("uint")); + } + output.get(dst->reg) = TP_Type(TypeSpec("int")); + } break; + case NEGATE: { + auto dst = dynamic_cast(destination.get()); + assert(dst); + output.get(dst->reg) = TP_Type(TypeSpec("int")); + } break; + case SET_REG_FALSE: { + auto dst = dynamic_cast(destination.get()); + assert(dst); + output.get(dst->reg).kind = TP_Type::FALSE; + } break; + case SET_REG_REG: { + auto dst = dynamic_cast(destination.get()); + assert(dst); + auto src = dynamic_cast(source.get()); + assert(src); + output.get(dst->reg) = output.get(src->reg); + break; + } + case SET_REG_TRUE: { + auto dst = dynamic_cast(destination.get()); + assert(dst); + output.get(dst->reg) = TP_Type(TypeSpec("symbol")); + } break; + + case SET_BINTEGER: { + auto dst = dynamic_cast(destination.get()); + assert(dst); + output.get(dst->reg) = TP_Type::make_type_object("binteger"); + } break; + + case SET_PAIR: { + auto dst = dynamic_cast(destination.get()); + assert(dst); + output.get(dst->reg) = TP_Type::make_type_object("pair"); + } break; + + case NOP: + break; + + default: + throw std::runtime_error("Unhandled branch delay in type_prop: " + to_form(file).print()); + } +} + +void IR_Branch_Atomic::propagate_types(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) { + // pass through types + end_types = input; + branch_delay.type_prop(end_types, file, dts); + // todo clobbers. +} + +TP_Type IR_IntMath1::get_expression_type(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) { + (void)input; + (void)dts; + auto arg_type = arg->get_expression_type(input, file, dts); + switch (kind) { + case NEG: + // if we negate a thing, let's just make it a signed integer. + return TP_Type(TypeSpec("int")); + case NOT: + return get_int_type(arg_type); + default: + throw std::runtime_error("IR_IntMath1::get_expression_type case not handled: " + + to_form(file).print()); + } +} + +TP_Type IR_SymbolValue::get_expression_type(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) { + (void)input; + (void)file; + if (name == "#f") { + TP_Type result; + result.kind = TP_Type::FALSE; + return result; + } else if (name == "__START-OF-TABLE__") { + return TP_Type(TypeSpec("uint")); + } + + auto type = dts.symbol_types.find(name); + if (type == dts.symbol_types.end()) { + throw std::runtime_error("Don't have the type of symbol " + name); + } + + if (type->second == TypeSpec("type")) { + // let's remember what we got this from. + return TP_Type::make_type_object(name); + } + + return TP_Type(type->second); +} + +TP_Type IR_Symbol::get_expression_type(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) { + (void)input; + (void)file; + (void)dts; + if (name == "#f") { + TP_Type result; + result.kind = TP_Type::FALSE; + return result; + } + + return TP_Type(TypeSpec("symbol")); +} + +TP_Type IR_IntegerConstant::get_expression_type(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) { + (void)input; + (void)file; + (void)dts; + return TP_Type(TypeSpec("int")); +} + +TP_Type IR_Compare::get_expression_type(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) { + (void)input; + (void)file; + (void)dts; + return TP_Type(TypeSpec("symbol")); +} + +void IR_Nop_Atomic::propagate_types(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) { + (void)file; + (void)dts; + end_types = input; +} + +void IR_Call_Atomic::propagate_types(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) { + (void)file; + (void)dts; + // todo clobber + end_types = input; + + auto in_tp = input.get(Register(Reg::GPR, Reg::T9)); + if (in_tp.kind == TP_Type::METHOD_NEW_OF_OBJECT && + !dts.type_prop_settings.current_method_type.empty()) { + end_types.get(Register(Reg::GPR, Reg::V0)) = + TP_Type(dts.type_prop_settings.current_method_type); + return; + } + auto in_type = in_tp.as_typespec(); + if (in_type.base_type() != "function") { + throw std::runtime_error("Called something that wasn't a function: " + in_type.print()); + } + + if (in_type.arg_count() < 1) { + throw std::runtime_error("Called a function, but we don't know its type"); + } + + end_types.get(Register(Reg::GPR, Reg::V0)) = TP_Type(in_type.last_arg()); +} + +void IR_Store_Atomic::propagate_types(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) { + (void)file; + (void)dts; + end_types = input; +} + +TP_Type IR_StaticAddress::get_expression_type(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) { + (void)input; + (void)dts; + auto label = file.labels.at(label_id); + if ((label.offset & 0xf) == 4) { + // it's a basic! probably. + const auto& word = file.words_by_seg.at(label.target_segment).at((label.offset - 4) / 4); + if (word.kind == LinkedWord::TYPE_PTR) { + return TP_Type(TypeSpec(word.symbol_name)); + } + } + + throw std::runtime_error("IR_StaticAddress couldn't figure out the type: " + label.name); +} + +void IR_AsmOp_Atomic::propagate_types(const TypeState& input, + const LinkedObjectFile& file, + DecompilerTypeSystem& dts) { + (void)file; + (void)dts; + auto dst_reg = dynamic_cast(dst.get()); + end_types = input; + if (dst_reg) { + if (name == "daddu") { + end_types.get(dst_reg->reg) = TP_Type(TypeSpec("uint")); + } + } +} \ No newline at end of file diff --git a/decompiler/ObjectFile/LinkedObjectFile.cpp b/decompiler/ObjectFile/LinkedObjectFile.cpp index ac87d85f6d..8bbcbc7238 100644 --- a/decompiler/ObjectFile/LinkedObjectFile.cpp +++ b/decompiler/ObjectFile/LinkedObjectFile.cpp @@ -548,13 +548,13 @@ std::string LinkedObjectFile::to_asm_json(const std::string& obj_file_name) { if (func.has_basic_ops() && func.instr_starts_basic_op(i)) { op["basic_op"] = func.get_basic_op_at_instr(i)->print(*this); - if (func.has_typemaps()) { - auto& tm = func.get_typemap_by_instr_idx(i); - auto& json_type_map = op["type_map"]; - for (auto& kv : tm) { - json_type_map[kv.first.to_charp()] = kv.second.print(); - } - } + // if (func.has_typemaps()) { + // auto& tm = func.get_typemap_by_instr_idx(i); + // auto& json_type_map = op["type_map"]; + // for (auto& kv : tm) { + // json_type_map[kv.first.to_charp()] = kv.second.print(); + // } + // } } for (int iidx = 0; iidx < instr.n_src; iidx++) { @@ -633,31 +633,6 @@ std::string LinkedObjectFile::print_function_disassembly(Function& func, } } } - - // print type map - if (func.has_typemaps()) { - if (line.length() < 60) { - line.append(60 - line.length(), ' '); - } - line += " tm: "; - auto& tm = func.get_typemap_by_instr_idx(i); - bool added = false; - for (auto reg_kind : {Reg::RegisterKind::GPR, Reg::RegisterKind::FPR}) { - for (int reg_idx = 0; reg_idx < 32; reg_idx++) { - auto gpr = Register(reg_kind, reg_idx); - auto kv = tm.find(gpr); - if (kv != tm.end()) { - added = true; - line += fmt::format("{}: {}, ", gpr.to_charp(), kv->second.print()); - } - } - } - - if (added) { - line.pop_back(); - line.pop_back(); - } - } } result += line + "\n"; } @@ -808,6 +783,9 @@ std::string LinkedObjectFile::print_type_analysis_debug() { result += ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n"; result += "; .function " + func.guessed_name.to_string() + "\n"; result += ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n"; + if (!func.warnings.empty()) { + result += ";; WARNING: " + func.warnings + "\n"; + } for (auto& block : func.basic_blocks) { result += "\n"; @@ -820,9 +798,14 @@ std::string LinkedObjectFile::print_type_analysis_debug() { result += " "; // result += func.basic_ops.at(i)->print_with_reguse(*this); // result += func.basic_ops.at(i)->print(*this); - result += func.basic_ops.at(i)->print_with_types(*init_types, *this); - result += "\n"; - init_types = &func.basic_ops.at(i)->end_types; + if (func.attempted_type_analysis) { + result += func.basic_ops.at(i)->print_with_types(*init_types, *this); + result += "\n"; + init_types = &func.basic_ops.at(i)->end_types; + } else { + result += func.basic_ops.at(i)->print(*this); + result += "\n"; + } } } } diff --git a/decompiler/ObjectFile/ObjectFileDB.cpp b/decompiler/ObjectFile/ObjectFileDB.cpp index 529d6ec0c9..f163ccf289 100644 --- a/decompiler/ObjectFile/ObjectFileDB.cpp +++ b/decompiler/ObjectFile/ObjectFileDB.cpp @@ -846,6 +846,7 @@ void ObjectFileDB::analyze_functions() { int non_asm_funcs = 0; int successful_cfg_irs = 0; int successful_type_analysis = 0; + int attempted_type_analysis = 0; std::map> unresolved_by_length; @@ -915,26 +916,50 @@ void ObjectFileDB::analyze_functions() { } // type analysis - if (func.guessed_name.kind == FunctionName::FunctionKind::GLOBAL) { - // we're a global named function. This means we're stored in a symbol - auto kv = dts.symbol_types.find(func.guessed_name.function_name); - if (kv != dts.symbol_types.end() && kv->second.arg_count() >= 1) { - if (kv->second.base_type() != "function") { - spdlog::error("Found a function named {} but the symbol has type {}", - func.guessed_name.to_string(), kv->second.print()); - assert(false); + if (get_config().function_type_prop) { + if (func.guessed_name.kind == FunctionName::FunctionKind::GLOBAL) { + // we're a global named function. This means we're stored in a symbol + auto kv = dts.symbol_types.find(func.guessed_name.function_name); + if (kv != dts.symbol_types.end() && kv->second.arg_count() >= 1) { + if (kv->second.base_type() != "function") { + spdlog::error("Found a function named {} but the symbol has type {}", + func.guessed_name.to_string(), kv->second.print()); + assert(false); + } + // GOOD! + func.type = kv->second; + func.attempted_type_analysis = true; + attempted_type_analysis++; + spdlog::info("Type Analysis on {} {}", func.guessed_name.to_string(), + kv->second.print()); + if (func.run_type_analysis(kv->second, dts, data.linked_data)) { + successful_type_analysis++; + } } - // GOOD! - func.type = kv->second; - - /* - spdlog::info("Type Analysis on {} {}", func.guessed_name.to_string(), - kv->second.print()); - func.run_type_analysis(kv->second, dts, data.linked_data); - */ - - if (func.has_typemaps()) { - successful_type_analysis++; + } else if (func.guessed_name.kind == FunctionName::FunctionKind::METHOD) { + // it's a method. + try { + auto info = + dts.ts.lookup_method(func.guessed_name.type_name, func.guessed_name.method_id); + if (info.type.arg_count() >= 1) { + if (info.type.base_type() != "function") { + spdlog::error("Found a method named {} but the symbol has type {}", + func.guessed_name.to_string(), info.type.print()); + assert(false); + } + // GOOD! + func.type = info.type.substitute_for_method_call(func.guessed_name.type_name); + func.attempted_type_analysis = true; + attempted_type_analysis++; + spdlog::info("Type Analysis on {} {}", func.guessed_name.to_string(), + func.type.print()); + if (func.run_type_analysis(func.type, dts, data.linked_data)) { + successful_type_analysis++; + } + } + + } catch (std::runtime_error& e) { + // failed to lookup method info } } } @@ -974,6 +999,11 @@ void ObjectFileDB::analyze_functions() { 100.f * float(total_reginfo_ops) / float(total_basic_ops)); spdlog::info(" {}/{} cfgs converted to ir ({:.3f}%)", successful_cfg_irs, non_asm_funcs, 100.f * float(successful_cfg_irs) / float(non_asm_funcs)); + spdlog::info(" {}/{} functions attempted type analysis ({:.2f}%)", attempted_type_analysis, + non_asm_funcs, 100.f * float(attempted_type_analysis) / float(non_asm_funcs)); + spdlog::info(" {}/{} functions that attempted type analysis succeeded ({:.2f}%)", + successful_type_analysis, attempted_type_analysis, + 100.f * float(successful_type_analysis) / float(attempted_type_analysis)); spdlog::info(" {}/{} functions passed type analysis ({:.2f}%)\n", successful_type_analysis, non_asm_funcs, 100.f * float(successful_type_analysis) / float(non_asm_funcs)); diff --git a/decompiler/config.cpp b/decompiler/config.cpp index 8d1a619a91..b9c302de5f 100644 --- a/decompiler/config.cpp +++ b/decompiler/config.cpp @@ -33,6 +33,7 @@ void set_config(const std::string& path_to_config_file) { gConfig.process_game_count = cfg.at("process_game_count").get(); gConfig.dump_objs = cfg.at("dump_objs").get(); gConfig.write_func_json = cfg.at("write_func_json").get(); + gConfig.function_type_prop = cfg.at("function_type_prop").get(); std::vector asm_functions_by_name = cfg.at("asm_functions_by_name").get>(); @@ -40,6 +41,12 @@ void set_config(const std::string& path_to_config_file) { gConfig.asm_functions_by_name.insert(x); } + std::vector pair_functions_by_name = + cfg.at("pair_functions_by_name").get>(); + for (const auto& x : pair_functions_by_name) { + gConfig.pair_functions_by_name.insert(x); + } + auto bad_inspect = cfg.at("types_with_bad_inspect_methods").get>(); for (const auto& x : bad_inspect) { gConfig.bad_inspect_types.insert(x); diff --git a/decompiler/config.h b/decompiler/config.h index 0f502743d2..209df20fc6 100644 --- a/decompiler/config.h +++ b/decompiler/config.h @@ -26,7 +26,9 @@ struct Config { bool process_game_count = false; bool dump_objs = false; bool write_func_json = false; + bool function_type_prop = false; std::unordered_set asm_functions_by_name; + std::unordered_set pair_functions_by_name; // ... }; diff --git a/decompiler/config/all-types.gc b/decompiler/config/all-types.gc index 0e7e10af8d..25aa7fddda 100644 --- a/decompiler/config/all-types.gc +++ b/decompiler/config/all-types.gc @@ -6,33 +6,70 @@ ;; TYPES ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; to move +(define-extern name= (function basic basic symbol)) ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;;; KERNEL TYPES ;;;;;;;;;;;;;;;;;;;;;; +;;;; BUILT-IN / C TYPES ;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~; +;; built-in +(define-extern symbol type) +(define-extern object type) +(define-extern #f symbol) +(define-extern #t symbol) +(define-extern integer type) +(define-extern structure type) +(define-extern pair type) +(define-extern number type) +(define-extern binteger type) +(define-extern function type) +(define-extern array type) +(define-extern basic type) +(define-extern type type) +(define-extern string type) +(define-extern uint8 type) +(define-extern int8 type) +(define-extern int16 type) +(define-extern uint32 type) +(define-extern int32 type) +(define-extern float type) +(define-extern nothing (function none)) -; ;; gcommon -; (deftype array (UNKNOWN) -; () -; :method-count-assert 0 -; :size-assert #x0 -; :flag-assert #x0 -; ;; too many basic blocks -; ) -; ;; gcommon -; (deftype vec4s (uint128) -; () -; :method-count-assert 9 -; :size-assert #x10 -; :flag-assert #x900000010 -; ;; likely a bitfield type -; ) +;; C +(define-extern dgo-load (function string kheap int int none)) + +;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;; GCOMMON ;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~; + +(define-extern identity (function object object)) +(define-extern 1/ (function float float)) +(define-extern + (function int int int)) +(define-extern - (function int int int)) +(define-extern * (function int int int)) +(define-extern / (function int int int)) +(define-extern ash (function int int int)) +(define-extern mod (function int int int)) +(define-extern rem (function int int int)) +(define-extern abs (function int int)) +(define-extern min (function int int int)) +(define-extern max (function int int int)) +(define-extern logior (function int int int)) +(define-extern logand (function int int int)) +(define-extern lognor (function int int int)) +(define-extern logxor (function int int int)) +(define-extern lognot (function int int)) +(define-extern false-func (function symbol)) +(define-extern true-func (function symbol)) +(define-extern format (function _varargs_ object)) +;; TODO - vec4s (or just forget it) -;; gcommon (deftype bfloat (basic) ((data float :offset-assert 4) ) @@ -41,7 +78,23 @@ :flag-assert #x900000008 ) -;; gcommon +(define-extern basic-type? (function basic type symbol)) +(define-extern type-type? (function type type symbol)) +(define-extern find-parent-method (function type int function)) +(define-extern ref (function object int object)) +(define-extern last (function object object)) +(define-extern member (function object object object)) +(define-extern nmember (function basic object object)) +(define-extern assoc (function object object object)) +(define-extern assoce (function object object object)) +(define-extern nassoc (function string object object)) +(define-extern nassoce (function string object object)) +(define-extern append! (function object object object)) +(define-extern delete! (function object object pair)) +(define-extern delete-car! (function object object object)) +(define-extern insert-cons! (function object object pair)) +(define-extern sort (function object (function object object object) object)) + (deftype inline-array-class (basic) ((length int32 :offset-assert 4) (allocated-length int32 :offset-assert 8) @@ -56,6 +109,49 @@ :flag-assert #x900000010 ) +(define-extern mem-copy! (function pointer pointer int pointer)) +(define-extern qmem-copy<-! (function pointer pointer int pointer)) +(define-extern qmem-copy->! (function pointer pointer int pointer)) +(define-extern mem-set32! (function pointer int int pointer)) +(define-extern mem-or! (function pointer pointer int pointer)) +;; quad-copy! +(define-extern fact (function int int)) +(define-extern *print-column* binteger) +(define-extern print (function object object)) +(define-extern printl (function object object)) +(define-extern inspect (function object object)) +(define-extern mem-print (function pointer int symbol)) +(define-extern *trace-list* pair) +(define-extern print-tree-bitmask (function int int symbol)) +(define-extern valid? (function object type basic basic object symbol)) + +;; has issues: +(define-extern breakpoint-range-set! function) + +; ;; gcommon +; (deftype array (UNKNOWN) +; () +; :method-count-assert 0 +; :size-assert #x0 +; :flag-assert #x0 +; ;; too many basic blocks +; ) + +; ;; gcommon +; (deftype vec4s (uint128) +; () +; :method-count-assert 9 +; :size-assert #x10 +; :flag-assert #x900000010 +; ;; likely a bitfield type +; ) + +;; gcommon + + +;; gcommon + + ;; gkernel-h (deftype kernel-context (basic) ((prevent-from-run uint32 :offset-assert 4) @@ -3348,11 +3444,7 @@ ;; lights-h (deftype light-array (array) - ((type basic :offset-assert 0) - (length int32 :offset-assert 4) - (allocated-length int32 :offset-assert 8) - (content-type basic :offset-assert 12) - ) + () :method-count-assert 9 :size-assert #x10 :flag-assert #x900000010 @@ -5804,11 +5896,7 @@ ;; prototype-h (deftype prototype-array-tie (array) - ((type basic :offset-assert 0) - (length int32 :offset-assert 4) - (allocated-length int32 :offset-assert 8) - (content-type basic :offset-assert 12) - ) + () :method-count-assert 10 :size-assert #x10 :flag-assert #xa00000010 @@ -30876,152 +30964,10 @@ ;; SYMBOLS ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; BUILTIN -(define-extern #f symbol) -(define-extern #t symbol) - -;; GCOMMON -(define-extern identity (function object object)) -(define-extern 1/ (function float float)) -(define-extern + (function int int int)) -(define-extern - (function int int int)) -(define-extern * (function int int int)) -(define-extern / (function int int int)) -(define-extern ash (function int int int)) -(define-extern mod (function int int int)) -(define-extern rem (function int int int)) -(define-extern abs (function int int)) -(define-extern min (function int int int)) -(define-extern max (function int int int)) -(define-extern logior (function int int int)) -(define-extern logand (function int int int)) -(define-extern lognor (function int int int)) -(define-extern logxor (function int int int)) -(define-extern lognot (function int int)) -(define-extern false-func (function symbol)) -(define-extern true-func (function symbol)) -;; format -;; vec4s -;; vec4s method 3 -;; vec4s method 2 -;; bfloat -;; bfloat method 3 -;; bfloat method 2 -;; type method 5 -;; basic-type? -;; type-type? -;; find-parent-method -;; pair method 4 -;; pair method 5 -;; last -;; member -;; nmember -;; assoc -;; assoce -;; nassoc -;; nassoce -;; append! -;; delete! -;; delete-car! -;; insert-cons! -;; sort -;; inline-array-class -;; inline-array-class method 3 -;; inline-array-class method 0 -;; inline-array-class method 4 -;; inline-array-class method 5 -;; array method 0 -;; array method 2 -;; array method 3 -;; array method 4 -;; array method 5 -;; mem-copy! -;; qmem-copy<-! -;; qmem-copy->! -;; mem-set32! -;; mem-or! -;; quad-copy! -;; fact -;; print-column -;; print -;; printl -;; inspect -;; mem-print -;; *trace-list* -;; print-tree-bitmask -;; breakpoint-range-set! -;; valid? - -(define-extern function type) -(define-extern insert-cons! (function object object pair)) -(define-extern basic-type? (function basic type symbol)) -;;(define-extern pair object) ;; unknown type - -(define-extern qmem-copy->! function) -(define-extern mem-set32! (function pointer int int pointer)) -(define-extern delete-car! (function object object object)) -;;(define-extern inline-array-class object) ;; unknown type -(define-extern mem-print function) -;;(define-extern vec4s object) ;; unknown type -(define-extern array type) -(define-extern print-tree-bitmask function) -(define-extern quad-copy! function) -(define-extern assoc (function object object object)) -(define-extern mem-or! function) -(define-extern fact function) -(define-extern assoce (function object object object)) -(define-extern basic type) -(define-extern type-type? (function type type symbol)) -(define-extern member (function object object object)) -(define-extern nmember (function basic object object)) -(define-extern breakpoint-range-set! function) -(define-extern nassoc function) -;;(define-extern _format object) ;; unknown type -(define-extern find-parent-method (function type int function)) -(define-extern nassoce function) -(define-extern ref (function object int object)) -;;(define-extern *print-column* object) ;; unknown type -(define-extern delete! (function object object pair)) -(define-extern qmem-copy<-! function) -(define-extern mem-copy! (function pointer pointer integer pointer)) -(define-extern type type) -;;(define-extern format object) ;; unknown type -;;(define-extern uint128 object) ;; unknown type -(define-extern append! (function object object object)) -;;(define-extern *trace-list* object) ;; unknown type -(define-extern last (function object object)) -(define-extern printl function) -(define-extern valid? function) -;;(define-extern nothing object) ;; unknown type -(define-extern sort (function object (function object object object) object)) -;;(define-extern method-set! object) ;; unknown type -;;(define-extern *debug-segment* object) ;; unknown type -(define-extern bfloat type) - -(define-extern string type) -;;(define-extern #t object) ;; unknown type -;;(define-extern integer object) ;; unknown type -(define-extern uint32 type) -(define-extern float type) -;;(define-extern uint16 object) ;; unknown type -(define-extern int32 type) -(define-extern uint8 type) -;;(define-extern int128 object) ;; unknown type -(define-extern symbol type) -(define-extern object type) -;;(define-extern uint64 object) ;; unknown type -;;(define-extern number object) ;; unknown type -(define-extern name= (function basic basic symbol)) -;;(define-extern int64 object) ;; unknown type -;;(define-extern global object) ;; unknown type -;;(define-extern structure object) ;; unknown type -(define-extern int8 type) -;;(define-extern binteger object) ;; unknown type -;;(define-extern else object) ;; unknown type -(define-extern int16 type) - +;; KERNEL +(define-extern *kernel-packages* pair) ;;(define-extern stack-frame object) ;; unknown type (define-extern state type) @@ -31053,7 +30999,7 @@ (define-extern search-process-tree function) (define-extern change-to-last-brother function) ;;(define-extern *pickup-dead-pool* object) ;; unknown type -;;(define-extern *kernel-packages* object) ;; unknown type + ;;(define-extern *camera-master-dead-pool* object) ;; unknown type (define-extern load-package (function string kheap pair)) (define-extern set-to-run function) diff --git a/decompiler/config/jak1_ntsc_black_label.jsonc b/decompiler/config/jak1_ntsc_black_label.jsonc index fe788addd8..c2b46e094f 100644 --- a/decompiler/config/jak1_ntsc_black_label.jsonc +++ b/decompiler/config/jak1_ntsc_black_label.jsonc @@ -11,6 +11,7 @@ "DGO/SNO.DGO", "DGO/SUB.DGO", "DGO/SUN.DGO", "CGO/SUNKEN.CGO", "DGO/SWA.DGO", "DGO/TIT.DGO", "DGO/TRA.DGO", "DGO/VI1.DGO", "DGO/VI2.DGO", "DGO/VI3.DGO", "CGO/VILLAGEP.CGO", "CGO/WATER-AN.CGO" ], + //"dgo_names":["CGO/KERNEL.CGO"], "object_file_names":["TEXT/0COMMON.TXT", "TEXT/1COMMON.TXT", "TEXT/2COMMON.TXT", "TEXT/3COMMON.TXT", "TEXT/4COMMON.TXT", "TEXT/5COMMON.TXT", "TEXT/6COMMON.TXT"], @@ -49,9 +50,11 @@ "STR/SAISA.STR","STR/SIHISC.STR","STR/MIIORBS.STR","STR/WAINTROD.STR","STR/SAISD2.STR","STR/GRSOPREB.STR", "STR/GRSOBBB.STR","STR/SA3INTRO.STR" ], + //"str_file_names":[], "analyze_functions":true, + "function_type_prop":false, "write_disassembly":true, "write_hex_near_instructions":false, @@ -477,9 +480,8 @@ "(anon-function 2 target-tube)", "(anon-function 5 orbit-plat)", "(anon-function 2 ogreboss)" + ], - - - - ] + "pair_functions_by_name":["ref", "last", "member", "nmember", "assoc", "assoce", "append!", "delete!", "delete-car!", + "insert-cons!", "sort", "unload-package", "(method 4 pair)", "nassoc", "nassoce"] } \ No newline at end of file diff --git a/decompiler/util/DecompilerTypeSystem.cpp b/decompiler/util/DecompilerTypeSystem.cpp index 867651dea5..f28326e3d8 100644 --- a/decompiler/util/DecompilerTypeSystem.cpp +++ b/decompiler/util/DecompilerTypeSystem.cpp @@ -3,6 +3,7 @@ #include "common/type_system/deftype.h" #include "decompiler/Disasm/Register.h" #include "third-party/spdlog/include/spdlog/spdlog.h" +#include "TP_Type.h" DecompilerTypeSystem::DecompilerTypeSystem() { ts.add_builtin_types(); @@ -149,35 +150,9 @@ void DecompilerTypeSystem::add_symbol(const std::string& name, const TypeSpec& t } } -std::string TP_Type::print() const { - switch (kind) { - case OBJECT_OF_TYPE: - return ts.print(); - case TYPE_OBJECT: - return fmt::format("[{}]", ts.print()); - case FALSE: - return fmt::format("[#f]"); - case NONE: - return fmt::format("[none]"); - default: - assert(false); - } -} - -std::string TypeState::print_gpr_masked(u32 mask) const { - std::string result; - for (int i = 0; i < 32; i++) { - if (mask & (1 << i)) { - result += Register(Reg::GPR, i).to_charp(); - result += ": "; - result += gpr_types[i].print(); - result += " "; - } - } - return result; -} - -TP_Type DecompilerTypeSystem::tp_lca(const TP_Type& existing, const TP_Type& add, bool* changed) { +TP_Type DecompilerTypeSystem::tp_lca_no_simplify(const TP_Type& existing, + const TP_Type& add, + bool* changed) { switch (existing.kind) { case TP_Type::OBJECT_OF_TYPE: switch (add.kind) { @@ -185,7 +160,7 @@ TP_Type DecompilerTypeSystem::tp_lca(const TP_Type& existing, const TP_Type& add // two normal types, do LCA as normal. TP_Type result; result.kind = TP_Type::OBJECT_OF_TYPE; - result.ts = ts.lowest_common_ancestor(existing.ts, add.ts); + result.ts = ts.lowest_common_ancestor_reg(existing.ts, add.ts); *changed = (result.ts != existing.ts); return result; } @@ -193,7 +168,7 @@ TP_Type DecompilerTypeSystem::tp_lca(const TP_Type& existing, const TP_Type& add // normal, [type object]. Change type object to less specific "type". TP_Type result; result.kind = TP_Type::OBJECT_OF_TYPE; - result.ts = ts.lowest_common_ancestor(existing.ts, ts.make_typespec("type")); + result.ts = ts.lowest_common_ancestor_reg(existing.ts, ts.make_typespec("type")); *changed = (result.ts != existing.ts); return result; } @@ -214,7 +189,7 @@ TP_Type DecompilerTypeSystem::tp_lca(const TP_Type& existing, const TP_Type& add case TP_Type::OBJECT_OF_TYPE: { TP_Type result; result.kind = TP_Type::OBJECT_OF_TYPE; - result.ts = ts.lowest_common_ancestor(ts.make_typespec("type"), add.ts); + result.ts = ts.lowest_common_ancestor_reg(ts.make_typespec("type"), add.ts); *changed = true; // changed type return result; } @@ -222,7 +197,7 @@ TP_Type DecompilerTypeSystem::tp_lca(const TP_Type& existing, const TP_Type& add // two type objects. TP_Type result; result.kind = TP_Type::TYPE_OBJECT; - result.ts = ts.lowest_common_ancestor(existing.ts, add.ts); + result.ts = ts.lowest_common_ancestor_reg(existing.ts, add.ts); *changed = (result.ts != existing.ts); return result; } @@ -261,6 +236,9 @@ TP_Type DecompilerTypeSystem::tp_lca(const TP_Type& existing, const TP_Type& add case TP_Type::OBJECT_OF_TYPE: case TP_Type::TYPE_OBJECT: case TP_Type::FALSE: + case TP_Type::METHOD_NEW_OF_OBJECT: + *changed = true; + return add; case TP_Type::NONE: *changed = false; return existing; @@ -268,11 +246,32 @@ TP_Type DecompilerTypeSystem::tp_lca(const TP_Type& existing, const TP_Type& add assert(false); } break; + case TP_Type::METHOD_NEW_OF_OBJECT: + switch (add.kind) { + case TP_Type::METHOD_NEW_OF_OBJECT: { + if (existing.ts == add.ts) { + *changed = false; + return existing; + } else { + assert(false); + } + } + case TP_Type::NONE: + *changed = false; + return existing; + default: + assert(false); + } + default: assert(false); } } +TP_Type DecompilerTypeSystem::tp_lca(const TP_Type& existing, const TP_Type& add, bool* changed) { + return tp_lca_no_simplify(existing.simplify(), add.simplify(), changed); +} + bool DecompilerTypeSystem::tp_lca(TypeState* combined, const TypeState& add) { bool result = false; for (int i = 0; i < 32; i++) { @@ -294,4 +293,4 @@ bool DecompilerTypeSystem::tp_lca(TypeState* combined, const TypeState& add) { } return result; -} \ No newline at end of file +} diff --git a/decompiler/util/DecompilerTypeSystem.h b/decompiler/util/DecompilerTypeSystem.h index 01f9e5d1a4..5a7e784ab0 100644 --- a/decompiler/util/DecompilerTypeSystem.h +++ b/decompiler/util/DecompilerTypeSystem.h @@ -2,20 +2,10 @@ #define JAK_DECOMPILERTYPESYSTEM_H #include "common/type_system/TypeSystem.h" +#include "decompiler/Disasm/Register.h" -struct TP_Type { - enum Kind { OBJECT_OF_TYPE, TYPE_OBJECT, FALSE, NONE } kind = NONE; - // in the case that we are type_object, just store the type name in a single arg ts. - TypeSpec ts; - std::string print() const; -}; - -struct TypeState { - TP_Type gpr_types[32]; - TP_Type fpr_types[32]; - - std::string print_gpr_masked(u32 mask) const; -}; +struct TP_Type; +struct TypeState; class DecompilerTypeSystem { public: @@ -46,7 +36,16 @@ class DecompilerTypeSystem { std::string lookup_parent_from_inspects(const std::string& child) const; bool lookup_flags(const std::string& type, u64* dest) const; TP_Type tp_lca(const TP_Type& existing, const TP_Type& add, bool* changed); + TP_Type tp_lca_no_simplify(const TP_Type& existing, const TP_Type& add, bool* changed); bool tp_lca(TypeState* combined, const TypeState& add); + struct { + bool allow_pair; + std::string current_method_type; + void reset() { + allow_pair = false; + current_method_type.clear(); + } + } type_prop_settings; }; #endif // JAK_DECOMPILERTYPESYSTEM_H diff --git a/decompiler/util/TP_Type.cpp b/decompiler/util/TP_Type.cpp new file mode 100644 index 0000000000..b4bfc70f39 --- /dev/null +++ b/decompiler/util/TP_Type.cpp @@ -0,0 +1,55 @@ +#include "TP_Type.h" +#include "third-party/fmt/core.h" + +/*! + * Takes the weird TP_Types and converts them to one of the main 4. + * This is supposed to be used if the fancy type analysis steps are attempted but fail. + */ +TP_Type TP_Type::simplify() const { + switch (kind) { + case PRODUCT: + return TP_Type(ts); + case METHOD_NEW_OF_OBJECT: + return TP_Type(ts); + case OBJ_PLUS_PRODUCT: + return TP_Type(TypeSpec("none")); + default: + return *this; + } +} + +std::string TP_Type::print() const { + switch (kind) { + case OBJECT_OF_TYPE: + return ts.print(); + case TYPE_OBJECT: + return fmt::format("[{}]", ts.print()); + case FALSE: + return fmt::format("[#f]"); + case NONE: + return fmt::format("[none]"); + case PRODUCT: + return fmt::format("[{} x {}]", ts.print(), multiplier); + case PARTIAL_METHOD_TABLE_ACCESS: + return fmt::format("[[vtable-access]]"); + case METHOD_NEW_OF_OBJECT: + return fmt::format("[(method object new)]"); + case OBJ_PLUS_PRODUCT: + return fmt::format("[{} + int x {}]", ts.print(), multiplier); + default: + assert(false); + } +} + +std::string TypeState::print_gpr_masked(u32 mask) const { + std::string result; + for (int i = 0; i < 32; i++) { + if (mask & (1 << i)) { + result += Register(Reg::GPR, i).to_charp(); + result += ": "; + result += gpr_types[i].print(); + result += " "; + } + } + return result; +} \ No newline at end of file diff --git a/decompiler/util/TP_Type.h b/decompiler/util/TP_Type.h new file mode 100644 index 0000000000..05aefe15e0 --- /dev/null +++ b/decompiler/util/TP_Type.h @@ -0,0 +1,92 @@ +#pragma once +#include +#include +#include "common/type_system/TypeSpec.h" +#include "common/common_types.h" +#include "decompiler/Disasm/Register.h" + +struct TP_Type { + enum Kind { + OBJECT_OF_TYPE, + TYPE_OBJECT, + FALSE, + NONE, + PRODUCT, + OBJ_PLUS_PRODUCT, + PARTIAL_METHOD_TABLE_ACCESS, // type + method_number * 4 + METHOD_NEW_OF_OBJECT + } kind = NONE; + // in the case that we are type_object, just store the type name in a single arg ts. + TypeSpec ts; + int multiplier; + + TP_Type() = default; + explicit TP_Type(const TypeSpec& _ts) { + kind = OBJECT_OF_TYPE; + ts = _ts; + } + + TP_Type simplify() const; + std::string print() const; + + bool is_object_of_type() const { return kind == TYPE_OBJECT || ts == TypeSpec("type"); } + + TypeSpec as_typespec() const { + switch (kind) { + case OBJECT_OF_TYPE: + return ts; + case TYPE_OBJECT: + return TypeSpec("type"); + case FALSE: + return TypeSpec("symbol"); + case NONE: + return TypeSpec("none"); + case PRODUCT: + case METHOD_NEW_OF_OBJECT: + return ts; + default: + assert(false); + } + } + + static TP_Type make_partial_method_table_access() { + TP_Type result; + result.kind = PARTIAL_METHOD_TABLE_ACCESS; + return result; + } + + static TP_Type make_type_object(const std::string& name) { + TP_Type result; + result.kind = TYPE_OBJECT; + result.ts = TypeSpec(name); + return result; + } +}; + +struct TypeState { + TP_Type gpr_types[32]; + TP_Type fpr_types[32]; + + std::string print_gpr_masked(u32 mask) const; + TP_Type& get(const Register& r) { + switch (r.get_kind()) { + case Reg::GPR: + return gpr_types[r.get_gpr()]; + case Reg::FPR: + return fpr_types[r.get_fpr()]; + default: + assert(false); + } + } + + const TP_Type& get(const Register& r) const { + switch (r.get_kind()) { + case Reg::GPR: + return gpr_types[r.get_gpr()]; + case Reg::FPR: + return fpr_types[r.get_fpr()]; + default: + assert(false); + } + } +}; \ No newline at end of file diff --git a/goal_src/kernel/gcommon.gc b/goal_src/kernel/gcommon.gc index 4331a8d4cd..c79af2ade1 100644 --- a/goal_src/kernel/gcommon.gc +++ b/goal_src/kernel/gcommon.gc @@ -10,44 +10,23 @@ -;; The "identity" returns its input unchanged. It uses the special GOAL "object" -;; type, which can basically be anything, so this will work on integers, floats, -;; strings, structures, arrays, etc. The only things which doesn't work with "object" -;; is a 128-bit integer. The upper 64-bits of the integer will usually be lost. (defun identity ((x object)) - ;; there is an optional "docstring" that can go at the beginning of a function "Function which returns its input. The first function of the game!" - - ;; the last thing in the function body is the return value. This is like "return x;" in C - ;; the return type of the function is figured out automatically by the compiler - ;; you don't have to specify it manually. x ) (defun 1/ ((x float)) "Reciprocal floating point" - - ;; this function computes 1.0 / x. GOAL allows strange function names like "1/". - - ;; Declaring this an inline function is like a C inline function, however code is - ;; still generated so it can be used a function object. GOAL inline functions have type - ;; checking, so they are preferable to macros when possible, to get better error messages. + ;; likely inlined? nothing calls this. (declare (inline)) - - ;; the division form will pick the math type (float, int) based on the type of the first - ;; argument. In this case, "1." is a floating point constant, so this becomes a floating point division. (/ 1. x) ) +;; these next 4 functions are just function wrappers around the build in add/subtract/multiply/divide. +;; this will let you use + as an operation on integers and also as a function pointer. (defun + ((x int) (y int)) "Compute the sum of two integers" - - ;; this wraps the compiler's built-in handling of "add two integers" in a GOAL function. - ;; now "+" can be used as a function object, but is limited to adding two integers when used like this. - ;; The compiler is smart enough to not use this function unless "+" is being used as a function object. - ;; ex: (+ a b c), (+ a b) ; won't use this function, uses built-in addition - ;; (set-combination-function! my-thing +) ; + becomes a function pointer in this case (+ x y) ) @@ -206,63 +185,24 @@ ;; or 128-bit arguments (unimplemented in C Kernel), but both of these were never finished. (define format _format) -;; TODO - vec4s - -;; The "boxed float" type "bfloat" is just a float wrapped in a basic (structure type that has runtime type information) -;; it's a way to have a floating point number that knows its a floating point number and can print/inspect itself -;; Compared to a normal float, it's much less efficient, so this is used extremely rarely. - -;; a GOAL deftype contains the following: -;; - type name -;; - parent type name -;; - field list -;; - method declarations -;; - additional options -;; It has "asserts" that can be used to make sure that the type is laid out in memory in the same way as the game. -;; You provide the actual offsets/sizes/method ids, and if there is a mismatch, it throws a compiler error. -;; The decompile will generate these automatically in the future. - -;; Type Name: should be a unique name. Can't be the name of a function or global variable. In this case, it's bfloat -;; Parent Type: Should be the name of the parent type ("basic" in this case). Will inherit fields and methods from the parent. -;; children of "basic" are structure types with runtime type information. -;; Field List: each field of the type, listed as (name type-name [options]) -;; use the :offset-assert X to do a check at comile-time that the OpenGOAL compiler places the field at the given offset. -;; if the compiler came up with a different offset, it will create an error. This used to make sure the memory layout matches -;; the original game. -;; Method Declarations: Any methods which are defined in this type but not the parent must be declared here. -;; you may optionally declare methods defined only in the parent, or defined in both the parent and child (overridden methods) -;; the method declarations is (method-name (arg-list) return-type [optional-id-assert]) -;; the optional id assert is used to check that the compiler places the method in the given slot of the method table. -;; like the offset-assert, it's used to make sure the type hierarchy matches the game. -;; Note that the special type "_type_" can be used in methods args/returns to indicate "the type of the object method is called on". -;; this is used for 2 things: -;; 1. Child who overrides it can use their own type as an argument, rather than a less specific parent type. -;; 2. Caller who calls an overriden method and knows it at compile time can know a return type more specifically. +;; vec4s - this is present in the game as a 128-bit integer child type full of 4 floats. +;; this doesn't seem to be used, and OpenGOAL doesn't support bitfields or 128-bit integers yet, so it is omitted. +;; I suspect this was unused because putting 4 floats in a 128-bit integer register is not an incredibly useful thing to do +;; - accessing all of these floats will be very slow. (deftype bfloat (basic) - ;; fields - ((data float :offset-assert 4)) ;; field "data" is a float. - ;; methods - (:methods (print (_type_) _type_ 2) ;; we will override print later on. This is optional to include - (inspect (_type_) _type_ 3) ;; this is a parent method we won't override. This is also optional to inlcude - ) - - ;; options - - ;; make sure the size of the type is correct (compare to value from game) + ((data float :offset-assert 4)) + (:methods + (print (_type_) _type_ 2) ;; we will override print later on. This is optional to include + (inspect (_type_) _type_ 3) ;; this is a parent method we won't override. This is also optional to inlcude + ) + :size-assert 8 - ;; make sure method count is correct (again, compare to value from game) :method-count-assert 9 - ;; flags passed to the new_type function in the runtime, compare from game :flag-assert #x900000008 ) - -;; The "print" method of a type should print out a single line representation of the object. -;; The default print method for a basic will be something like # -;; This is used when printing an object with format, using the "~A" format specification. -;; And of course in functions like print, printl. (defmethod print bfloat ((obj bfloat)) "Override the default print method to print a bfloat like a normal float" (format #t "~f" (-> obj data)) @@ -284,13 +224,15 @@ ;; A "type" object contains some basic information about a type as well as the list of methods. ;; Some types have more methods than others, so the method table makes "type" a dynamic type. -;; As a result, we should define an "asize-of" method for type. It's possibly unused because it's wrong. +;; As a result, we should define an "asize-of" method for type. It's possibly unused and it's wrong. (defmethod asize-of type ((obj type)) "Get the size in memory of a type" ;; The 28 is 8 bytes too large. It's also strange that types have a 16-byte aligned size always, ;; but this matches what the runtime does as well. There's no reason that I can see for this, ;; as other basics don't require 16-byte aligned sizes. + ;; - this is perhaps accurate back when types where inside of the symbol table? Or before switching to u16's + ;; for some of the type values? (align16 (+ 28 (* 4 (-> type allocated-length)))) ) @@ -621,7 +563,7 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; memcpy and similar ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(defun mem-copy! ((dst pointer) (src pointer) (size integer)) +(defun mem-copy! ((dst pointer) (src pointer) (size int)) "Copy memory from src to dst. Size is in bytes. This is not an efficient implementation, however, there are _no restrictions_ on size, alignment etc. Increasing address copy." (let ((i 0)