Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Type Prop - Second Attempt #142

Merged
merged 7 commits into from
Nov 28, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions common/type_system/Type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,21 @@ bool Type::get_my_method(const std::string& name, MethodInfo* out) const {
return false;
}

/*!
* Get a method that is defined specifically in this type by id. Returns if it was found or not.
*/
bool Type::get_my_method(int id, MethodInfo* out) const {
assert(id > 0); // 0 is new, should use explicit new method functions instead.
for (auto& x : m_methods) {
if (x.id == id) {
*out = x;
return true;
}
}

return false;
}

/*!
* Get the last method defined specifically for this type. Returns if there were any methods
* defined specifically for this type or not.
Expand Down
1 change: 1 addition & 0 deletions common/type_system/Type.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ class Type {
std::string get_parent() const;
void set_runtime_type(std::string name);
bool get_my_method(const std::string& name, MethodInfo* out) const;
bool get_my_method(int id, MethodInfo* out) const;
bool get_my_last_method(MethodInfo* out) const;
bool get_my_new_method(MethodInfo* out) const;
const MethodInfo& add_method(const MethodInfo& info);
Expand Down
54 changes: 51 additions & 3 deletions common/type_system/TypeSystem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,40 @@ MethodInfo TypeSystem::lookup_method(const std::string& type_name, const std::st
throw std::runtime_error("lookup_method failed");
}

/*!
* Lookup information on a method by ID number. Error if it can't be found. Will check parent types
* if the given type doesn't specialize the method.
*/
MethodInfo TypeSystem::lookup_method(const std::string& type_name, int method_id) {
if (method_id == GOAL_NEW_METHOD) {
return lookup_new_method(type_name);
}

MethodInfo info;

// first lookup the type
auto* type = lookup_type(type_name);

auto* iter_type = type;
// look up the method
while (true) {
if (iter_type->get_my_method(method_id, &info)) {
return info;
}

if (iter_type->has_parent()) {
iter_type = lookup_type(iter_type->get_parent());
} else {
// couldn't find method.
break;
}
}

fmt::print("[TypeSystem] The method with id {} of type {} could not be found.\n", method_id,
type_name);
throw std::runtime_error("lookup_method failed");
}

/*!
* Lookup information on a new method and get the most specialized version.
*/
Expand Down Expand Up @@ -715,7 +749,14 @@ void TypeSystem::add_builtin_types() {
add_field_to_type(kheap_type, "top-base", make_typespec("pointer"));

// todo
(void)array_type;
builtin_structure_inherit(array_type);
add_method(array_type, "new",
make_function_typespec({"symbol", "type", "type", "int"}, "_type_"));
// array has: number, number, type
add_field_to_type(array_type, "length", make_typespec("int32"));
add_field_to_type(array_type, "allocated-length", make_typespec("int32"));
add_field_to_type(array_type, "content-type", make_typespec("type"));
add_field_to_type(array_type, "data", make_typespec("uint8"), false, true);

// pair
pair_type->override_offset(2);
Expand Down Expand Up @@ -1064,6 +1105,10 @@ TypeSpec TypeSystem::lowest_common_ancestor(const TypeSpec& a, const TypeSpec& b
return result;
}

TypeSpec TypeSystem::lowest_common_ancestor_reg(const TypeSpec& a, const TypeSpec& b) {
return coerce_to_reg_type(lowest_common_ancestor(a, b));
}

/*!
* Lowest common ancestor of multiple (or at least one) type.
*/
Expand All @@ -1083,12 +1128,12 @@ TypeSpec TypeSystem::lowest_common_ancestor(const std::vector<TypeSpec>& types)
TypeSpec coerce_to_reg_type(const TypeSpec& in) {
if (in.arg_count() == 0) {
if (in.base_type() == "int8" || in.base_type() == "int16" || in.base_type() == "int32" ||
in.base_type() == "int64") {
in.base_type() == "int64" || in.base_type() == "integer") {
return TypeSpec("int");
}

if (in.base_type() == "uint8" || in.base_type() == "uint16" || in.base_type() == "uint32" ||
in.base_type() == "uint64") {
in.base_type() == "uint64" || in.base_type() == "uinteger") {
return TypeSpec("uint");
}
}
Expand Down Expand Up @@ -1133,6 +1178,9 @@ bool TypeSystem::reverse_deref(const ReverseDerefInputInfo& input,
token.kind = ReverseDerefInfo::DerefToken::INDEX;
token.index = closest_index;

if (!di.mem_deref) {
return false;
}
assert(di.mem_deref);
if (offset_into_elt == 0) {
if (input.mem_deref) {
Expand Down
2 changes: 2 additions & 0 deletions common/type_system/TypeSystem.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ class TypeSystem {
bool allow_new_method = true);
MethodInfo add_new_method(Type* type, const TypeSpec& ts);
MethodInfo lookup_method(const std::string& type_name, const std::string& method_name);
MethodInfo lookup_method(const std::string& type_name, int method_id);
MethodInfo lookup_new_method(const std::string& type_name);
void assert_method_id(const std::string& type_name, const std::string& method_name, int id);

Expand Down Expand Up @@ -135,6 +136,7 @@ class TypeSystem {
}

TypeSpec lowest_common_ancestor(const TypeSpec& a, const TypeSpec& b);
TypeSpec lowest_common_ancestor_reg(const TypeSpec& a, const TypeSpec& b);
TypeSpec lowest_common_ancestor(const std::vector<TypeSpec>& types);

private:
Expand Down
5 changes: 4 additions & 1 deletion decompiler/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@ add_executable(decompiler
data/tpage.cpp
data/game_text.cpp
data/StrFileReader.cpp
data/game_count.cpp data/LinkedWordReader.h)
data/game_count.cpp
Function/TypeAnalysis.cpp
IR/IR_TypeAnalysis.cpp
util/TP_Type.cpp)

target_link_libraries(decompiler
goos
Expand Down
6 changes: 6 additions & 0 deletions decompiler/Function/BasicBlocks.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

#include "CfgVtx.h"
#include "decompiler/util/DecompilerTypeSystem.h"
#include "decompiler/util/TP_Type.h"

class LinkedObjectFile;
class Function;
Expand All @@ -26,6 +27,11 @@ struct BasicBlock {
BasicBlock(int _start_word, int _end_word) : start_word(_start_word), end_word(_end_word) {}
};

struct BlockTopologicalSort {
std::vector<int> vist_order;
std::unordered_set<int> unreachable;
};

std::vector<BasicBlock> find_blocks_in_function(const LinkedObjectFile& file,
int seg,
const Function& func);
43 changes: 39 additions & 4 deletions decompiler/Function/Function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -683,10 +683,6 @@ std::shared_ptr<IR_Atomic> Function::get_basic_op_at_instr(int idx) {
return basic_ops.at(instruction_to_basic_op.at(idx));
}

const TypeMap& Function::get_typemap_by_instr_idx(int idx) {
return basic_op_typemaps.at(instruction_to_basic_op.at(idx));
}

int Function::get_basic_op_count() {
return basic_ops.size();
}
Expand All @@ -709,4 +705,43 @@ int Function::get_reginfo_basic_op_count() {
}
}
return count;
}

/*!
* Topological sort of basic blocks.
* Returns a valid ordering + a list of blocks that you can't reach and therefore
* aren't in the ordering.
*/
BlockTopologicalSort Function::bb_topo_sort() {
BlockTopologicalSort result;
std::unordered_set<int> visit_set;
std::vector<int> visit_queue;
if (basic_blocks.empty()) {
assert(false);
}

visit_queue.push_back(0);

while (!visit_queue.empty()) {
// let's visit the most recently added:
auto to_visit = visit_queue.back();
visit_queue.pop_back();
result.vist_order.push_back(to_visit);

auto& block = basic_blocks.at(to_visit);
for (auto next : {block.succ_branch, block.succ_ft}) {
if (next != -1 && visit_set.find(next) == visit_set.end()) {
visit_set.insert(next);
visit_queue.push_back(next);
}
}
}

for (int i = 0; i < int(basic_blocks.size()); i++) {
if (visit_set.find(i) == visit_set.end()) {
result.unreachable.insert(i);
}
}

return result;
}
15 changes: 4 additions & 11 deletions decompiler/Function/Function.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
#include "common/type_system/TypeSpec.h"

class DecompilerTypeSystem;
// Map of what type is in each register.
using TypeMap = std::unordered_map<Register, TypeSpec, Register::hash>;
class IR_Atomic;
class IR;

Expand Down Expand Up @@ -65,11 +63,6 @@ struct FunctionName {
}
};

class BasicOpTypeInfo {
public:
std::unordered_map<Register, TypeSpec> all_reg_types;
};

class Function {
public:
Function(int _start_word, int _end_word);
Expand All @@ -79,16 +72,15 @@ class Function {
void find_type_defs(LinkedObjectFile& file, DecompilerTypeSystem& dts);
void add_basic_op(std::shared_ptr<IR_Atomic> op, int start_instr, int end_instr);
bool has_basic_ops() { return !basic_ops.empty(); }
bool has_typemaps() { return !basic_op_typemaps.empty(); }
bool instr_starts_basic_op(int idx);
std::shared_ptr<IR_Atomic> get_basic_op_at_instr(int idx);
const TypeMap& get_typemap_by_instr_idx(int idx);
int get_basic_op_count();
int get_failed_basic_op_count();
int get_reginfo_basic_op_count();
void run_type_analysis(const TypeSpec& my_type,
bool run_type_analysis(const TypeSpec& my_type,
DecompilerTypeSystem& dts,
LinkedObjectFile& file);
BlockTopologicalSort bb_topo_sort();

TypeSpec type;

Expand Down Expand Up @@ -117,6 +109,8 @@ class Function {
std::string warnings;
bool contains_asm_ops = false;

bool attempted_type_analysis = false;

struct Prologue {
bool decoded = false; // have we removed the prologue from basic blocks?
int total_stack_usage = -1;
Expand Down Expand Up @@ -150,7 +144,6 @@ class Function {

private:
void check_epilogue(const LinkedObjectFile& file);
std::vector<TypeMap> basic_op_typemaps;
std::unordered_map<int, int> instruction_to_basic_op;
std::unordered_map<int, int> basic_op_to_instruction;
};
Expand Down
101 changes: 101 additions & 0 deletions decompiler/Function/TypeAnalysis.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#include "TypeAnalysis.h"
#include "decompiler/IR/IR.h"
#include "third-party/fmt/core.h"
#include "decompiler/config.h"

namespace {
TypeState construct_initial_typestate(const TypeSpec& f_ts) {
TypeState result;
int goal_args[] = {Reg::A0, Reg::A1, Reg::A2, Reg::A3, Reg::T0, Reg::T1, Reg::T2, Reg::T3};
assert(f_ts.base_type() == "function");
assert(f_ts.arg_count() >= 1);
assert(f_ts.arg_count() <= 8);
for (int i = 0; i < int(f_ts.arg_count()) - 1; i++) {
auto reg_id = goal_args[i];
auto reg_type = f_ts.get_arg(i);
result.gpr_types[reg_id].ts = reg_type;
result.gpr_types[reg_id].kind = TP_Type::OBJECT_OF_TYPE;
}
return result;
}
} // namespace

bool Function::run_type_analysis(const TypeSpec& my_type,
DecompilerTypeSystem& dts,
LinkedObjectFile& file) {
// STEP 0 - setup settings
dts.type_prop_settings.reset();
if (get_config().pair_functions_by_name.find(guessed_name.to_string()) !=
get_config().pair_functions_by_name.end()) {
dts.type_prop_settings.allow_pair = true;
}

if (guessed_name.kind == FunctionName::FunctionKind::METHOD) {
dts.type_prop_settings.current_method_type = guessed_name.type_name;
}

// STEP 1 - get the topo sort.
auto order = bb_topo_sort();
// fmt::print("blocks: {}\n ", basic_blocks.size());
// for (auto x : order.vist_order) {
// fmt::print("{} ", x);
// }
// fmt::print("\n");

// STEP 2 - establish visit order
assert(!order.vist_order.empty());
assert(order.vist_order.front() == 0);

// STEP 3 - initialize type state.
basic_blocks.at(0).init_types = construct_initial_typestate(my_type);

// STEP 2 - loop while types are changing
bool run_again = true;
while (run_again) {
run_again = false;
// each block in order now.
for (auto block_id : order.vist_order) {
auto& block = basic_blocks.at(block_id);
TypeState* init_types = &block.init_types;
for (int op_id = block.start_basic_op; op_id < block.end_basic_op; op_id++) {
auto& op = basic_ops.at(op_id);

// while the implementation of propagate_types is in progress, it may throw
// for unimplemented cases. Eventually this try/catch should be removed.
try {
op->propagate_types(*init_types, file, dts);
} catch (std::runtime_error& e) {
fmt::print("Type prop fail: {}\n\n\n", e.what());
warnings += "Type prop attempted and failed. ";
return false;
}

// todo, set run again??

// for the next op...
init_types = &op->end_types;
}

// propagate the types: for each possible succ
for (auto succ_block_id : {block.succ_ft, block.succ_branch}) {
if (succ_block_id != -1) {
auto& succ_block = basic_blocks.at(succ_block_id);
// set types to LCA (current, new)
if (dts.tp_lca(&succ_block.init_types, *init_types)) {
// if something changed, run again!
run_again = true;
}
}
}
}
}

auto last_op = basic_ops.back();
auto last_type = last_op->end_types.get(Register(Reg::GPR, Reg::V0)).as_typespec();
if (last_type != my_type.last_arg()) {
warnings += fmt::format("return type mismatch {} vs {}. ", last_type.print(),
my_type.last_arg().print());
}

return true;
}
2 changes: 2 additions & 0 deletions decompiler/Function/TypeAnalysis.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#pragma once
#include "Function.h"
Loading