From de1164af72fcc576a5f084b6b6c4838311bf9cfa Mon Sep 17 00:00:00 2001 From: water Date: Tue, 2 Feb 2021 20:42:20 -0500 Subject: [PATCH 01/10] fix shift naming issue --- common/util/FileUtil.cpp | 2 +- common/util/FileUtil.h | 2 +- common/versions.h | 2 +- decompiler/IR2/Form.cpp | 8 +-- decompiler/IR2/FormExpressionAnalysis.cpp | 8 +-- decompiler/IR2/IR2_common.h | 4 +- doc/changelog.md | 5 +- goal_src/engine/math/math.gc | 12 ++-- goal_src/kernel/gcommon.gc | 4 +- goalc/compiler/Compiler.h | 3 - goalc/compiler/compilation/Atoms.cpp | 3 - goalc/compiler/compilation/Math.cpp | 69 ++++++++----------- test/decompiler/test_FormExpressionBuild.cpp | 8 +-- .../test_FormExpressionBuildLong.cpp | 4 +- .../source_templates/arithmetic/ash.static.gc | 4 +- .../arithmetic/shiftvs.static.gc | 7 +- tools/dgo_unpacker.cpp | 2 +- 17 files changed, 70 insertions(+), 77 deletions(-) diff --git a/common/util/FileUtil.cpp b/common/util/FileUtil.cpp index a3df4e22b6..34bd460338 100644 --- a/common/util/FileUtil.cpp +++ b/common/util/FileUtil.cpp @@ -70,7 +70,7 @@ bool create_dir_if_needed(const std::string& path) { return false; } -void write_binary_file(const std::string& name, void* data, size_t size) { +void write_binary_file(const std::string& name, const void* data, size_t size) { FILE* fp = fopen(name.c_str(), "wb"); if (!fp) { throw std::runtime_error("couldn't open file " + name); diff --git a/common/util/FileUtil.h b/common/util/FileUtil.h index eb612d0fd8..946f753126 100644 --- a/common/util/FileUtil.h +++ b/common/util/FileUtil.h @@ -12,7 +12,7 @@ namespace file_util { std::string get_project_path(); std::string get_file_path(const std::vector& input); bool create_dir_if_needed(const std::string& path); -void write_binary_file(const std::string& name, void* data, size_t size); +void write_binary_file(const std::string& name, const void* data, size_t size); void write_rgba_png(const std::string& name, void* data, int w, int h); void write_text_file(const std::string& file_name, const std::string& text); std::vector read_binary_file(const std::string& filename); diff --git a/common/versions.h b/common/versions.h index d415879b4f..76d3016102 100644 --- a/common/versions.h +++ b/common/versions.h @@ -13,7 +13,7 @@ namespace versions { // language version (OpenGOAL) constexpr s32 GOAL_VERSION_MAJOR = 0; -constexpr s32 GOAL_VERSION_MINOR = 5; +constexpr s32 GOAL_VERSION_MINOR = 6; // these versions are from the game constexpr u32 ART_FILE_VERSION = 6; diff --git a/decompiler/IR2/Form.cpp b/decompiler/IR2/Form.cpp index 3a229fe08b..68536e0e2a 100644 --- a/decompiler/IR2/Form.cpp +++ b/decompiler/IR2/Form.cpp @@ -1067,10 +1067,10 @@ std::string fixed_operator_to_string(FixedOperatorKind kind) { return "lognor"; case FixedOperatorKind::LOGNOT: return "lognot"; - case FixedOperatorKind::SLL: - return "sll"; - case FixedOperatorKind::SRL: - return "srl"; + case FixedOperatorKind::SHL: + return "shl"; + case FixedOperatorKind::SHR: + return "shr"; case FixedOperatorKind::CAR: return "car"; case FixedOperatorKind::CDR: diff --git a/decompiler/IR2/FormExpressionAnalysis.cpp b/decompiler/IR2/FormExpressionAnalysis.cpp index 5a415f09f6..52530c58a8 100644 --- a/decompiler/IR2/FormExpressionAnalysis.cpp +++ b/decompiler/IR2/FormExpressionAnalysis.cpp @@ -426,10 +426,10 @@ void SimpleExpressionElement::update_from_stack(const Env& env, update_from_stack_lognot(env, pool, stack, result); break; case SimpleExpression::Kind::LEFT_SHIFT: - update_from_stack_force_ui_2(env, FixedOperatorKind::SLL, pool, stack, result); + update_from_stack_force_ui_2(env, FixedOperatorKind::SHL, pool, stack, result); break; case SimpleExpression::Kind::RIGHT_SHIFT_LOGIC: - update_from_stack_force_ui_2(env, FixedOperatorKind::SRL, pool, stack, result); + update_from_stack_force_ui_2(env, FixedOperatorKind::SHR, pool, stack, result); break; case SimpleExpression::Kind::MUL_UNSIGNED: update_from_stack_force_ui_2(env, FixedOperatorKind::MULTIPLICATION, pool, stack, result); @@ -979,7 +979,7 @@ void DynamicMethodAccess::update_from_stack(const Env& env, Matcher::match_or({Matcher::any_reg(1), Matcher::cast("int", Matcher::any_reg(1))}); // (+ (sll (the-as uint a1-0) 2) (the-as int a0-0)) - auto sll_matcher = Matcher::fixed_op(FixedOperatorKind::SLL, {reg0_matcher, Matcher::integer(2)}); + auto sll_matcher = Matcher::fixed_op(FixedOperatorKind::SHL, {reg0_matcher, Matcher::integer(2)}); auto matcher = Matcher::fixed_op(FixedOperatorKind::ADDITION, {sll_matcher, reg1_matcher}); auto match_result = match(matcher, new_val); if (!match_result.matched) { @@ -1052,7 +1052,7 @@ void ArrayFieldAccess::update_from_stack(const Env& env, auto reg1_matcher = Matcher::match_or({Matcher::any_reg(1), Matcher::cast("int", Matcher::any_reg(1))}); auto sll_matcher = - Matcher::fixed_op(FixedOperatorKind::SLL, {reg0_matcher, Matcher::integer(power_of_two)}); + Matcher::fixed_op(FixedOperatorKind::SHL, {reg0_matcher, Matcher::integer(power_of_two)}); auto matcher = Matcher::fixed_op(FixedOperatorKind::ADDITION, {sll_matcher, reg1_matcher}); auto match_result = match(matcher, new_val); if (!match_result.matched) { diff --git a/decompiler/IR2/IR2_common.h b/decompiler/IR2/IR2_common.h index 66b6333401..dfcb7b5302 100644 --- a/decompiler/IR2/IR2_common.h +++ b/decompiler/IR2/IR2_common.h @@ -105,8 +105,8 @@ enum class FixedOperatorKind { LOGXOR, LOGNOR, LOGNOT, - SLL, - SRL, + SHL, + SHR, CAR, CDR, NEW, diff --git a/doc/changelog.md b/doc/changelog.md index 70ee70e46a..de8b5344c7 100644 --- a/doc/changelog.md +++ b/doc/changelog.md @@ -107,4 +107,7 @@ - Accessing a constant field of an array now constant propagates the memory offset like field access and avoids a runtime multiply. - Fixed a bug where loading or storing a `vf` register from a memory location + constant offset would cause the compiler to throw an error. - Accessing array elements uses more efficient indexing for power-of-two element sizes. -- Added a `local-vars` form for declaring a bunch of local variables for the decompiler. \ No newline at end of file +- Added a `local-vars` form for declaring a bunch of local variables for the decompiler. + +## V0.6 +- There is no longer a separate compiler form for variable vs. constant shifts. Instead the compiler will pick the constant shift automatically when possible. The shifts are called `sar`, `shl` and `shr`, like the x86 instructions. \ No newline at end of file diff --git a/goal_src/engine/math/math.gc b/goal_src/engine/math/math.gc index afe47eef51..12ab483c4b 100644 --- a/goal_src/engine/math/math.gc +++ b/goal_src/engine/math/math.gc @@ -47,7 +47,7 @@ (defun log2 ((x int)) "Straight out of Bit Twiddling Hacks graphics.stanford.edu" - (- (sarv (the-as integer (the float x)) 23) 127) + (- (sar (the-as integer (the float x)) 23) 127) ) (defun seek ((x float) (target float) (diff float)) @@ -139,7 +139,7 @@ (set! (-> *random-generator* seed) #x666EDD1E) (defmacro sext32-64 (x) - `(sarv (shlv ,x 32) 32) + `(sar (shl ,x 32) 32) ) (defun rand-uint31-gen ((gen random-generator)) @@ -152,17 +152,17 @@ ;; mult3 v0, v1, a1 (prod (imul64 16807 sd)) ;; mfhi v1 - (hi (shrv prod 32)) ;; sign extend this? - (lo (sarv (shlv prod 32) 32)) + (hi (shr prod 32)) ;; sign extend this? + (lo (sar (shl prod 32) 32)) ;; daddu v1, v1, v1 (v1 (+ hi hi)) ;; srl a1, v0, 31 - (a1 (logand #xffffffff (shrv lo 31))) + (a1 (logand #xffffffff (shr lo 31))) ;; or v1, v1, a1 ;; daddu v0, v0 v1 (result (+ lo (logior v1 a1))) ) - (set! result (shrv (logand #xffffffff (shlv result 1)) 1)) + (set! result (shr (logand #xffffffff (shl result 1)) 1)) (set! (-> gen seed) result) result ) diff --git a/goal_src/kernel/gcommon.gc b/goal_src/kernel/gcommon.gc index fcaf8d7ed5..eeb5cb8f7d 100644 --- a/goal_src/kernel/gcommon.gc +++ b/goal_src/kernel/gcommon.gc @@ -71,8 +71,8 @@ ;; these correspond to x86-64 variable shift instructions. ;; the exact behavior of GOAL shifts (signed/unsigned) are unknown so for now shifts must ;; be manually specified. - (shlv value shift-amount) - (sarv value (- shift-amount)) + (shl value shift-amount) + (sar value (- shift-amount)) ) ) diff --git a/goalc/compiler/Compiler.h b/goalc/compiler/Compiler.h index 19ecccb8d2..8b033ebbea 100644 --- a/goalc/compiler/Compiler.h +++ b/goalc/compiler/Compiler.h @@ -354,9 +354,6 @@ class Compiler { Val* compile_mul(const goos::Object& form, const goos::Object& rest, Env* env); Val* compile_imul64(const goos::Object& form, const goos::Object& rest, Env* env); Val* compile_div(const goos::Object& form, const goos::Object& rest, Env* env); - Val* compile_shlv(const goos::Object& form, const goos::Object& rest, Env* env); - Val* compile_sarv(const goos::Object& form, const goos::Object& rest, Env* env); - Val* compile_shrv(const goos::Object& form, const goos::Object& rest, Env* env); Val* compile_shl(const goos::Object& form, const goos::Object& rest, Env* env); Val* compile_sar(const goos::Object& form, const goos::Object& rest, Env* env); Val* compile_shr(const goos::Object& form, const goos::Object& rest, Env* env); diff --git a/goalc/compiler/compilation/Atoms.cpp b/goalc/compiler/compilation/Atoms.cpp index ca83bce137..1ff10fafa1 100644 --- a/goalc/compiler/compilation/Atoms.cpp +++ b/goalc/compiler/compilation/Atoms.cpp @@ -112,9 +112,6 @@ static const std::unordered_map< {"*", &Compiler::compile_mul}, {"imul64", &Compiler::compile_imul64}, {"/", &Compiler::compile_div}, - {"shlv", &Compiler::compile_shlv}, - {"shrv", &Compiler::compile_shrv}, - {"sarv", &Compiler::compile_sarv}, {"shl", &Compiler::compile_shl}, {"shr", &Compiler::compile_shr}, {"sar", &Compiler::compile_sar}, diff --git a/goalc/compiler/compilation/Math.cpp b/goalc/compiler/compilation/Math.cpp index 2c7c3cac3f..78b1ef1a2c 100644 --- a/goalc/compiler/compilation/Math.cpp +++ b/goalc/compiler/compilation/Math.cpp @@ -396,30 +396,6 @@ Val* Compiler::compile_div(const goos::Object& form, const goos::Object& rest, E return get_none(); } -Val* Compiler::compile_shlv(const goos::Object& form, const goos::Object& rest, Env* env) { - auto args = get_va(form, rest); - va_check(form, args, {{}, {}}, {}); - auto first = compile_error_guard(args.unnamed.at(0), env)->to_gpr(env); - auto second = compile_error_guard(args.unnamed.at(1), env)->to_gpr(env); - return compile_variable_shift(form, first, second, env, IntegerMathKind::SHLV_64); -} - -Val* Compiler::compile_sarv(const goos::Object& form, const goos::Object& rest, Env* env) { - auto args = get_va(form, rest); - va_check(form, args, {{}, {}}, {}); - auto first = compile_error_guard(args.unnamed.at(0), env)->to_gpr(env); - auto second = compile_error_guard(args.unnamed.at(1), env)->to_gpr(env); - return compile_variable_shift(form, first, second, env, IntegerMathKind::SARV_64); -} - -Val* Compiler::compile_shrv(const goos::Object& form, const goos::Object& rest, Env* env) { - auto args = get_va(form, rest); - va_check(form, args, {{}, {}}, {}); - auto first = compile_error_guard(args.unnamed.at(0), env)->to_gpr(env); - auto second = compile_error_guard(args.unnamed.at(1), env)->to_gpr(env); - return compile_variable_shift(form, first, second, env, IntegerMathKind::SHRV_64); -} - Val* Compiler::compile_variable_shift(const goos::Object& form, const RegVal* in, const RegVal* sa, @@ -449,35 +425,50 @@ Val* Compiler::compile_variable_shift(const goos::Object& form, Val* Compiler::compile_shl(const goos::Object& form, const goos::Object& rest, Env* env) { auto args = get_va(form, rest); - va_check(form, args, {{}, {goos::ObjectType::INTEGER}}, {}); + va_check(form, args, {{}, {}}, {}); auto first = compile_error_guard(args.unnamed.at(0), env)->to_gpr(env); - auto sa = args.unnamed.at(1).as_int(); - if (sa < 0 || sa > 64) { - throw_compiler_error(form, "Cannot shift by more than 64, or by a negative amount."); + int64_t constant_sa = -1; + if (try_getting_constant_integer(args.unnamed.at(1), &constant_sa, env)) { + if (constant_sa < 0 || constant_sa > 64) { + throw_compiler_error(form, "Cannot shift by more than 64, or by a negative amount."); + } + return compile_fixed_shift(form, first, constant_sa, env, IntegerMathKind::SHL_64); + } else { + auto second = compile_error_guard(args.unnamed.at(1), env)->to_gpr(env); + return compile_variable_shift(form, first, second, env, IntegerMathKind::SHLV_64); } - return compile_fixed_shift(form, first, sa, env, IntegerMathKind::SHL_64); } Val* Compiler::compile_shr(const goos::Object& form, const goos::Object& rest, Env* env) { auto args = get_va(form, rest); - va_check(form, args, {{}, {goos::ObjectType::INTEGER}}, {}); + va_check(form, args, {{}, {}}, {}); auto first = compile_error_guard(args.unnamed.at(0), env)->to_gpr(env); - auto sa = args.unnamed.at(1).as_int(); - if (sa < 0 || sa > 64) { - throw_compiler_error(form, "Cannot shift by more than 64, or by a negative amount"); + int64_t constant_sa = -1; + if (try_getting_constant_integer(args.unnamed.at(1), &constant_sa, env)) { + if (constant_sa < 0 || constant_sa > 64) { + throw_compiler_error(form, "Cannot shift by more than 64, or by a negative amount."); + } + return compile_fixed_shift(form, first, constant_sa, env, IntegerMathKind::SHR_64); + } else { + auto second = compile_error_guard(args.unnamed.at(1), env)->to_gpr(env); + return compile_variable_shift(form, first, second, env, IntegerMathKind::SHRV_64); } - return compile_fixed_shift(form, first, sa, env, IntegerMathKind::SHR_64); } Val* Compiler::compile_sar(const goos::Object& form, const goos::Object& rest, Env* env) { auto args = get_va(form, rest); - va_check(form, args, {{}, {goos::ObjectType::INTEGER}}, {}); + va_check(form, args, {{}, {}}, {}); auto first = compile_error_guard(args.unnamed.at(0), env)->to_gpr(env); - auto sa = args.unnamed.at(1).as_int(); - if (sa < 0 || sa > 64) { - throw_compiler_error(form, "Cannot shift by more than 64, or by a negative amount"); + int64_t constant_sa = -1; + if (try_getting_constant_integer(args.unnamed.at(1), &constant_sa, env)) { + if (constant_sa < 0 || constant_sa > 64) { + throw_compiler_error(form, "Cannot shift by more than 64, or by a negative amount."); + } + return compile_fixed_shift(form, first, constant_sa, env, IntegerMathKind::SAR_64); + } else { + auto second = compile_error_guard(args.unnamed.at(1), env)->to_gpr(env); + return compile_variable_shift(form, first, second, env, IntegerMathKind::SARV_64); } - return compile_fixed_shift(form, first, sa, env, IntegerMathKind::SAR_64); } Val* Compiler::compile_fixed_shift(const goos::Object& form, diff --git a/test/decompiler/test_FormExpressionBuild.cpp b/test/decompiler/test_FormExpressionBuild.cpp index 7b6da961ea..f6ee67a60b 100644 --- a/test/decompiler/test_FormExpressionBuild.cpp +++ b/test/decompiler/test_FormExpressionBuild.cpp @@ -422,7 +422,7 @@ TEST_F(FormRegressionTest, ExprSizeOfType) { " daddiu sp, sp, 16"; std::string type = "(function type uint)"; - std::string expected = "(logand (l.d L346) (+ (sll (-> a0-1 allocated-length) 2) 43))"; + std::string expected = "(logand (l.d L346) (+ (shl (-> a0-1 allocated-length) 2) 43))"; test_with_expr(func, type, expected, false, ""); } @@ -684,7 +684,7 @@ TEST_F(FormRegressionTest, ExprPairMethod4) { " (set! v0-0 1)\n" " (while\n" " (and (!= v1-1 '()) " - " (<0.si (sll (the-as uint v1-1) 62))\n" + " (<0.si (shl (the-as uint v1-1) 62))\n" " )\n" " (set! v0-0 (+ v0-0 1))\n" " (set! v1-1 (cdr v1-1))\n" @@ -1548,7 +1548,7 @@ TEST_F(FormRegressionTest, ExprSort) { " (set! s3-0 gp-0)\n" " (while\n" " (not\n" - " (or (= (cdr s3-0) (quote ())) (>=0.si (sll (the-as uint (cdr s3-0)) 62)))\n" + " (or (= (cdr s3-0) (quote ())) (>=0.si (shl (the-as uint (cdr s3-0)) 62)))\n" " )\n" " (set! s2-0 (car s3-0))\n" " (set! s1-0 (car (cdr s3-0)))\n" @@ -2172,7 +2172,7 @@ TEST_F(FormRegressionTest, ExprPrintTreeBitmask) { " (format (quote #t) L323)\n" " (format (quote #t) L322)\n" " )\n" - " (set! gp-0 (srl (the-as uint gp-0) 1))\n" + " (set! gp-0 (shr (the-as uint gp-0) 1))\n" " (set! s4-0 (+ s4-0 1))\n" " )\n" " (set! v1-3 (quote #f))\n" diff --git a/test/decompiler/test_FormExpressionBuildLong.cpp b/test/decompiler/test_FormExpressionBuildLong.cpp index 07ebf6cdcc..f3548c9504 100644 --- a/test/decompiler/test_FormExpressionBuildLong.cpp +++ b/test/decompiler/test_FormExpressionBuildLong.cpp @@ -669,7 +669,7 @@ TEST_F(FormRegressionTest, ExprArrayMethod2) { " (set!\n" " v1-42\n" " (+\n" - " (sll (the-as uint s5-8) 4)\n" + " (shl (the-as uint s5-8) 4)\n" " (the-as int (the-as (array uint128) gp-0))\n" " )\n" " )\n" @@ -1322,7 +1322,7 @@ TEST_F(FormRegressionTest, ExprArrayMethod3) { " (set!\n" " v1-42\n" " (+\n" - " (sll (the-as uint s5-8) 4)\n" + " (shl (the-as uint s5-8) 4)\n" " (the-as int (the-as (array uint128) gp-0))\n" " )\n" " )\n" diff --git a/test/goalc/source_templates/arithmetic/ash.static.gc b/test/goalc/source_templates/arithmetic/ash.static.gc index e93ae2c3ec..04c38b0c85 100644 --- a/test/goalc/source_templates/arithmetic/ash.static.gc +++ b/test/goalc/source_templates/arithmetic/ash.static.gc @@ -1,8 +1,8 @@ (defun ash ((value integer) (shift-amount integer)) (declare (inline)) (if (> shift-amount 0) - (shlv value shift-amount) - (sarv value (- shift-amount)) + (shl value shift-amount) + (sar value (- shift-amount)) ) ) diff --git a/test/goalc/source_templates/arithmetic/shiftvs.static.gc b/test/goalc/source_templates/arithmetic/shiftvs.static.gc index 09f438eafa..07531eb51a 100644 --- a/test/goalc/source_templates/arithmetic/shiftvs.static.gc +++ b/test/goalc/source_templates/arithmetic/shiftvs.static.gc @@ -1,2 +1,7 @@ -(+ (shlv 2 3) (shlv 1 0) (shlv 0 4) (shrv 2 3) (shrv 10 2) (shlv -2 1) (sarv -16 2)) + +(let ((one 1) + (two 2)) + (+ (shl 2 3) (shl 1 0) (shl 0 4) (shr 2 3) (shr 10 two) (shl -2 one) (sar -16 two)) + ) + ;; 16 1 0 0 2 -4 -4 \ No newline at end of file diff --git a/tools/dgo_unpacker.cpp b/tools/dgo_unpacker.cpp index cd22445d66..c3453ec2e7 100644 --- a/tools/dgo_unpacker.cpp +++ b/tools/dgo_unpacker.cpp @@ -28,7 +28,7 @@ int main(int argc, char** argv) { // write files: for (auto& entry : dgo.entries()) { file_util::write_binary_file(file_util::combine_path(out_path, entry.unique_name), - (void*)entry.data.data(), entry.data.size()); + (const void*)entry.data.data(), entry.data.size()); } } From 34cc4589cea387fe270d371a7999e4c628554819 Mon Sep 17 00:00:00 2001 From: water Date: Tue, 2 Feb 2021 20:55:14 -0500 Subject: [PATCH 02/10] fix bad argument variable names --- decompiler/analysis/variable_naming.cpp | 17 +++++++++++++++++ test/decompiler/test_FormExpressionBuild.cpp | 2 +- .../decompiler/test_FormExpressionBuildLong.cpp | 2 +- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/decompiler/analysis/variable_naming.cpp b/decompiler/analysis/variable_naming.cpp index 09479522db..650eafc489 100644 --- a/decompiler/analysis/variable_naming.cpp +++ b/decompiler/analysis/variable_naming.cpp @@ -436,6 +436,23 @@ void SSA::remap() { } }; std::unordered_map used_vars; + // we do this in two passes. the first pass collects only the B0 variables and adds those first, + // so these remain index 0 (expected by later decompiler passes) + for (auto& block : blocks) { + assert(block.phis.empty()); + for (auto& instr : block.ins) { + if (instr.dst.has_value() && map.var_id(*instr.dst) == 0) { + used_vars[instr.dst->reg()].insert(map.var_id(*instr.dst)); + } + for (auto& src : instr.src) { + if (map.var_id(src) == 0) { + used_vars[src.reg()].insert(map.var_id(src)); + } + } + } + } + + // and the second pass grabs all of them for (auto& block : blocks) { assert(block.phis.empty()); for (auto& instr : block.ins) { diff --git a/test/decompiler/test_FormExpressionBuild.cpp b/test/decompiler/test_FormExpressionBuild.cpp index f6ee67a60b..d8d73b73b8 100644 --- a/test/decompiler/test_FormExpressionBuild.cpp +++ b/test/decompiler/test_FormExpressionBuild.cpp @@ -422,7 +422,7 @@ TEST_F(FormRegressionTest, ExprSizeOfType) { " daddiu sp, sp, 16"; std::string type = "(function type uint)"; - std::string expected = "(logand (l.d L346) (+ (shl (-> a0-1 allocated-length) 2) 43))"; + std::string expected = "(logand (l.d L346) (+ (shl (-> a0-0 allocated-length) 2) 43))"; test_with_expr(func, type, expected, false, ""); } diff --git a/test/decompiler/test_FormExpressionBuildLong.cpp b/test/decompiler/test_FormExpressionBuildLong.cpp index f3548c9504..3618eea4d5 100644 --- a/test/decompiler/test_FormExpressionBuildLong.cpp +++ b/test/decompiler/test_FormExpressionBuildLong.cpp @@ -1946,7 +1946,7 @@ TEST_F(FormRegressionTest, ExprValid) { " (else (quote #t))\n" " )\n" " )\n" - " ((and a3-2 (not gp-0)) (quote #t))\n" + " ((and a3-0 (not gp-0)) (quote #t))\n" " (else\n" " (cond\n" " ((= s3-0 structure)\n" From 4d1fbf9b07a2875520fa2d295421e5d7b8315d53 Mon Sep 17 00:00:00 2001 From: water Date: Tue, 2 Feb 2021 21:22:05 -0500 Subject: [PATCH 03/10] fix missing variable issue --- decompiler/IR2/Env.cpp | 63 +------------------- test/decompiler/FormRegressionTest.cpp | 25 ++++++++ test/decompiler/FormRegressionTest.h | 8 +++ test/decompiler/test_FormExpressionBuild.cpp | 46 ++++++++++++++ 4 files changed, 80 insertions(+), 62 deletions(-) diff --git a/decompiler/IR2/Env.cpp b/decompiler/IR2/Env.cpp index 03b29cd2b1..e7cc0bd1d5 100644 --- a/decompiler/IR2/Env.cpp +++ b/decompiler/IR2/Env.cpp @@ -57,67 +57,6 @@ std::string Env::print_local_var_types(const Form* top_level_form) const { entries.push_back(fmt::format("{}: {}", x.name(), x.type.typespec().print())); } - if (top_level_form) { - VariableSet var_set; - top_level_form->collect_vars(var_set); - - // we want to sort them for easier reading: - std::vector> vars; - - for (auto& x : var_set) { - vars.push_back(std::make_pair(get_ssa_var(x), x)); - } - - std::sort(vars.begin(), vars.end(), - [](const std::pair& a, const std::pair& b) { - return a.first < b.first; - }); - - RegId* prev = nullptr; - for (auto& x : vars) { - // sorted by ssa var and there are likely duplicates of Variables and SSA vars, only print - // unique ssa variables. - if (prev && x.first == *prev) { - continue; - } - prev = &x.first; - auto& map = x.second.mode() == VariableMode::WRITE ? m_var_names.write_vars.at(x.second.reg()) - : m_var_names.read_vars.at(x.second.reg()); - auto& info = map.at(x.first.id); - - if (info.initialized) { - entries.push_back(fmt::format("{}: {}", info.name(), info.type.typespec().print())); - } else { - assert(false); - } - } - } else { - std::unordered_map, Register::hash> printed; - - for (auto& reg_info : m_var_names.read_vars) { - auto& reg_printed = printed[reg_info.first]; - for (int var_id = 0; var_id < int(reg_info.second.size()); var_id++) { - auto& info = reg_info.second.at(var_id); - if (info.initialized) { - reg_printed.insert(var_id); - entries.push_back(fmt::format("{}: {}", info.name(), info.type.typespec().print())); - } - } - } - - for (auto& reg_info : m_var_names.write_vars) { - auto& reg_printed = printed[reg_info.first]; - for (int var_id = 0; var_id < int(reg_info.second.size()); var_id++) { - auto& info = reg_info.second.at(var_id); - if (info.initialized) { - if (reg_printed.find(var_id) == reg_printed.end()) { - entries.push_back(fmt::format("{}: {}", info.name(), info.type.typespec().print())); - } - } - } - } - } - int max_len = 0; for (auto& entry : entries) { if (int(entry.length()) > max_len) { @@ -228,7 +167,7 @@ goos::Object Env::local_var_type_list(const Form* top_level_form, int count = 0; for (auto& x : vars) { if (x.reg_id.reg.get_kind() == Reg::GPR && x.reg_id.reg.get_gpr() < Reg::A0 + nargs_to_ignore && - x.reg_id.reg.get_gpr() >= Reg::A0) { + x.reg_id.reg.get_gpr() >= Reg::A0 && x.reg_id.id == 0) { continue; } count++; diff --git a/test/decompiler/FormRegressionTest.cpp b/test/decompiler/FormRegressionTest.cpp index a87af3273c..5a59bba509 100644 --- a/test/decompiler/FormRegressionTest.cpp +++ b/test/decompiler/FormRegressionTest.cpp @@ -4,6 +4,7 @@ #include "decompiler/analysis/reg_usage.h" #include "decompiler/analysis/cfg_builder.h" #include "decompiler/analysis/expression_build.h" +#include "decompiler/analysis/final_output.h" #include "common/goos/PrettyPrinter.h" #include "decompiler/IR2/Form.h" #include "third-party/json.hpp" @@ -180,6 +181,30 @@ void FormRegressionTest::test(const std::string& code, EXPECT_TRUE(expected_form == actual_form); } +void FormRegressionTest::test_final_function( + const std::string& code, + const std::string& type, + const std::string& expected, + bool allow_pairs, + const std::vector>& strings, + const std::unordered_map>& hints) { + auto ts = dts->parse_type_spec(type); + auto test = make_function(code, ts, true, allow_pairs, "", strings, hints); + ASSERT_TRUE(test); + auto expected_form = + pretty_print::get_pretty_printer_reader().read_from_string(expected, false).as_pair()->car; + ASSERT_TRUE(test->func.ir2.top_form); + auto final = final_defun_out(test->func, test->func.ir2.env, *dts); + auto actual_form = + pretty_print::get_pretty_printer_reader().read_from_string(final, false).as_pair()->car; + if (expected_form != actual_form) { + printf("Got:\n%s\n\nExpected\n%s\n", pretty_print::to_string(actual_form).c_str(), + pretty_print::to_string(expected_form).c_str()); + } + + EXPECT_TRUE(expected_form == actual_form); +} + std::unordered_map> FormRegressionTest::parse_hint_json( const std::string& in) { std::unordered_map> out; diff --git a/test/decompiler/FormRegressionTest.h b/test/decompiler/FormRegressionTest.h index f88a76a9ce..b463f408b6 100644 --- a/test/decompiler/FormRegressionTest.h +++ b/test/decompiler/FormRegressionTest.h @@ -66,5 +66,13 @@ class FormRegressionTest : public ::testing::Test { test(code, type, expected, true, allow_pairs, method_name, strings, hints); } + void test_final_function( + const std::string& code, + const std::string& type, + const std::string& expected, + bool allow_pairs = false, + const std::vector>& strings = {}, + const std::unordered_map>& hints = {}); + std::unordered_map> parse_hint_json(const std::string& in); }; \ No newline at end of file diff --git a/test/decompiler/test_FormExpressionBuild.cpp b/test/decompiler/test_FormExpressionBuild.cpp index d8d73b73b8..f68b9ff736 100644 --- a/test/decompiler/test_FormExpressionBuild.cpp +++ b/test/decompiler/test_FormExpressionBuild.cpp @@ -469,6 +469,52 @@ TEST_F(FormRegressionTest, ExprBasicTypeP) { test_with_expr(func, type, expected); } +TEST_F(FormRegressionTest, FinalBasicTypeP) { + std::string func = + " sll r0, r0, 0\n" + "L285:\n" + " lwu v1, -4(a0)\n" + " lw a0, object(s7)\n" + + "L286:\n" + " bne v1, a1, L287\n" + " or a2, s7, r0\n" + + " daddiu v1, s7, #t\n" + " or v0, v1, r0\n" + " beq r0, r0, L288\n" + " sll r0, r0, 0\n" + + " or v1, r0, r0\n" + "L287:\n" + " lwu v1, 4(v1)\n" + " bne v1, a0, L286\n" + " sll r0, r0, 0\n" + " or v0, s7, r0\n" + "L288:\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function basic type symbol)"; + std::string expected = + "(defun test-function ((a0-0 basic) (a1-0 type))\n" + " (local-vars\n" + " (v1-0 type)\n" + " (a0-1 type)\n" + " (a2-0 symbol)\n" + " )\n" + " (begin\n" + " (set! v1-0 (-> a0-0 type))\n" + " (set! a0-1 object)\n" + " (until\n" + " (begin (set! v1-0 (-> v1-0 parent)) (= v1-0 a0-1))\n" + " (if (= v1-0 a1-0) (return (quote #t) (set! v1-0 0)))\n" + " )\n" + " (quote #f)\n" + " )\n" + " )"; + test_final_function(func, type, expected); +} + TEST_F(FormRegressionTest, ExprTypeTypep) { std::string func = " sll r0, r0, 0\n" From 24b799c7fc435b086bf37e18a139462286e1730b Mon Sep 17 00:00:00 2001 From: water Date: Tue, 2 Feb 2021 21:35:52 -0500 Subject: [PATCH 04/10] small missing things --- decompiler/IR2/AtomicOpForm.cpp | 7 +++++++ decompiler/IR2/Form.cpp | 2 ++ decompiler/IR2/FormExpressionAnalysis.cpp | 3 +++ decompiler/IR2/IR2_common.h | 1 + 4 files changed, 13 insertions(+) diff --git a/decompiler/IR2/AtomicOpForm.cpp b/decompiler/IR2/AtomicOpForm.cpp index 4406893ce1..028fad04ad 100644 --- a/decompiler/IR2/AtomicOpForm.cpp +++ b/decompiler/IR2/AtomicOpForm.cpp @@ -90,6 +90,13 @@ FormElement* SetVarConditionOp::get_as_form(FormPool& pool, const Env& env) cons FormElement* StoreOp::get_as_form(FormPool& pool, const Env& env) const { if (env.has_type_analysis()) { + if (m_addr.is_identity() && m_addr.get_arg(0).is_sym_val()) { + auto val = pool.alloc_single_element_form(nullptr, m_value.as_expr(), + m_my_idx); + auto src = pool.alloc_single_element_form(nullptr, m_addr, m_my_idx); + return pool.alloc_element(src, val); + } + IR2_RegOffset ro; if (get_as_reg_offset(m_addr, &ro)) { auto& input_type = env.get_types_before_op(m_my_idx).get(ro.reg); diff --git a/decompiler/IR2/Form.cpp b/decompiler/IR2/Form.cpp index 68536e0e2a..5564a99ba2 100644 --- a/decompiler/IR2/Form.cpp +++ b/decompiler/IR2/Form.cpp @@ -1071,6 +1071,8 @@ std::string fixed_operator_to_string(FixedOperatorKind kind) { return "shl"; case FixedOperatorKind::SHR: return "shr"; + case FixedOperatorKind::SAR: + return "sar"; case FixedOperatorKind::CAR: return "car"; case FixedOperatorKind::CDR: diff --git a/decompiler/IR2/FormExpressionAnalysis.cpp b/decompiler/IR2/FormExpressionAnalysis.cpp index 52530c58a8..d7925f9b28 100644 --- a/decompiler/IR2/FormExpressionAnalysis.cpp +++ b/decompiler/IR2/FormExpressionAnalysis.cpp @@ -431,6 +431,9 @@ void SimpleExpressionElement::update_from_stack(const Env& env, case SimpleExpression::Kind::RIGHT_SHIFT_LOGIC: update_from_stack_force_ui_2(env, FixedOperatorKind::SHR, pool, stack, result); break; + case SimpleExpression::Kind::RIGHT_SHIFT_ARITH: + update_from_stack_force_ui_2(env, FixedOperatorKind::SAR, pool, stack, result); + break; case SimpleExpression::Kind::MUL_UNSIGNED: update_from_stack_force_ui_2(env, FixedOperatorKind::MULTIPLICATION, pool, stack, result); break; diff --git a/decompiler/IR2/IR2_common.h b/decompiler/IR2/IR2_common.h index dfcb7b5302..ebada81173 100644 --- a/decompiler/IR2/IR2_common.h +++ b/decompiler/IR2/IR2_common.h @@ -107,6 +107,7 @@ enum class FixedOperatorKind { LOGNOT, SHL, SHR, + SAR, CAR, CDR, NEW, From d71a3d0645441307bb3a6b90f3dc8c965dc59930 Mon Sep 17 00:00:00 2001 From: water Date: Wed, 3 Feb 2021 17:36:18 -0500 Subject: [PATCH 05/10] wip --- decompiler/IR2/AtomicOpForm.cpp | 6 +- decompiler/IR2/Form.cpp | 10 +- decompiler/IR2/Form.h | 4 +- decompiler/IR2/FormExpressionAnalysis.cpp | 153 +++++++++++++++------- decompiler/IR2/IR2_common.h | 1 + 5 files changed, 121 insertions(+), 53 deletions(-) diff --git a/decompiler/IR2/AtomicOpForm.cpp b/decompiler/IR2/AtomicOpForm.cpp index 028fad04ad..8bd99089bf 100644 --- a/decompiler/IR2/AtomicOpForm.cpp +++ b/decompiler/IR2/AtomicOpForm.cpp @@ -245,8 +245,10 @@ FormElement* LoadVarOp::get_as_form(FormPool& pool, const Env& env) const { } assert(rd.tokens.back().kind == FieldReverseLookupOutput::Token::Kind::VAR_IDX); - auto load = pool.alloc_single_element_form(nullptr, ro.var, tokens, - input_type.get_multiplier()); + // we pass along the register offset because code generation seems to be a bit + // different in different cases. + auto load = pool.alloc_single_element_form( + nullptr, ro.var, tokens, input_type.get_multiplier(), ro.offset); return pool.alloc_element(m_dst, load, true); } } diff --git a/decompiler/IR2/Form.cpp b/decompiler/IR2/Form.cpp index 5564a99ba2..0a8191d643 100644 --- a/decompiler/IR2/Form.cpp +++ b/decompiler/IR2/Form.cpp @@ -1081,6 +1081,8 @@ std::string fixed_operator_to_string(FixedOperatorKind kind) { return "new"; case FixedOperatorKind::OBJECT_NEW: return "object-new"; + case FixedOperatorKind::TYPE_NEW: + return "type-new"; default: assert(false); } @@ -1325,8 +1327,12 @@ void DynamicMethodAccess::get_modified_regs(RegSet&) const {} ///////////////////////////// ArrayFieldAccess::ArrayFieldAccess(Variable source, const std::vector& deref_tokens, - int expected_stride) - : m_source(source), m_deref_tokens(deref_tokens), m_expected_stride(expected_stride) {} + int expected_stride, + int constant_offset) + : m_source(source), + m_deref_tokens(deref_tokens), + m_expected_stride(expected_stride), + m_constant_offset(constant_offset) {} goos::Object ArrayFieldAccess::to_form(const Env& env) const { std::vector elts; diff --git a/decompiler/IR2/Form.h b/decompiler/IR2/Form.h index 62ca38b0af..13987dd70c 100644 --- a/decompiler/IR2/Form.h +++ b/decompiler/IR2/Form.h @@ -791,7 +791,8 @@ class ArrayFieldAccess : public FormElement { public: ArrayFieldAccess(Variable source, const std::vector& deref_tokens, - int expected_stride); + int expected_stride, + int constant_offset); goos::Object to_form(const Env& env) const override; void apply(const std::function& f) override; void apply_form(const std::function& f) override; @@ -806,6 +807,7 @@ class ArrayFieldAccess : public FormElement { Variable m_source; std::vector m_deref_tokens; int m_expected_stride = -1; + int m_constant_offset = -1; }; class GetMethodElement : public FormElement { diff --git a/decompiler/IR2/FormExpressionAnalysis.cpp b/decompiler/IR2/FormExpressionAnalysis.cpp index d7925f9b28..a40e7c0c15 100644 --- a/decompiler/IR2/FormExpressionAnalysis.cpp +++ b/decompiler/IR2/FormExpressionAnalysis.cpp @@ -275,9 +275,21 @@ void SimpleExpressionElement::update_from_stack_force_si_2(const Env& env, FormStack& stack, std::vector* result) { auto arg0_i = is_int_type(env, m_my_idx, m_expr.get_arg(0).var()); - auto arg1_i = is_int_type(env, m_my_idx, m_expr.get_arg(1).var()); + bool arg1_i = true; + bool arg1_reg = m_expr.get_arg(1).is_var(); + if (arg1_reg) { + arg1_i = is_int_type(env, m_my_idx, m_expr.get_arg(1).var()); + } else { + assert(m_expr.get_arg(1).is_int()); + } - auto args = pop_to_forms({m_expr.get_arg(0).var(), m_expr.get_arg(1).var()}, env, pool, stack); + std::vector args; + if (arg1_reg) { + args = pop_to_forms({m_expr.get_arg(0).var(), m_expr.get_arg(1).var()}, env, pool, stack); + } else { + args = pop_to_forms({m_expr.get_arg(0).var()}, env, pool, stack); + args.push_back(pool.alloc_single_element_form(nullptr, m_expr.get_arg(1))); + } if (!arg0_i) { args.at(0) = pool.alloc_single_element_form(nullptr, TypeSpec("int"), args.at(0)); @@ -432,7 +444,7 @@ void SimpleExpressionElement::update_from_stack(const Env& env, update_from_stack_force_ui_2(env, FixedOperatorKind::SHR, pool, stack, result); break; case SimpleExpression::Kind::RIGHT_SHIFT_ARITH: - update_from_stack_force_ui_2(env, FixedOperatorKind::SAR, pool, stack, result); + update_from_stack_force_si_2(env, FixedOperatorKind::SAR, pool, stack, result); break; case SimpleExpression::Kind::MUL_UNSIGNED: update_from_stack_force_ui_2(env, FixedOperatorKind::MULTIPLICATION, pool, stack, result); @@ -555,6 +567,13 @@ void FunctionCallElement::update_from_stack(const Env& env, GenericOperator::make_fixed(FixedOperatorKind::OBJECT_NEW), new_args); result->push_back(new_op); return; + } + if (name == "new" && type_1 == "type") { + std::vector new_args = dynamic_cast(new_form)->elts(); + auto new_op = pool.alloc_element( + GenericOperator::make_fixed(FixedOperatorKind::TYPE_NEW), new_args); + result->push_back(new_op); + return; } else if (name == "new") { constexpr int allocation = 2; constexpr int type_for_arg = 3; @@ -1024,55 +1043,93 @@ void ArrayFieldAccess::update_from_stack(const Env& env, FormStack& stack, std::vector* result) { auto new_val = stack.pop_reg(m_source, {}, env); - int power_of_two = 0; - if (m_expected_stride == 1) { - // reg0 is idx - auto reg0_matcher = - Matcher::match_or({Matcher::any_reg(0), Matcher::cast("int", Matcher::any_reg(0))}); - // reg1 is base - auto reg1_matcher = - Matcher::match_or({Matcher::any_reg(1), Matcher::cast("int", Matcher::any_reg(1))}); - auto matcher = Matcher::fixed_op(FixedOperatorKind::ADDITION, {reg0_matcher, reg1_matcher}); - auto match_result = match(matcher, new_val); - if (!match_result.matched) { - throw std::runtime_error("Couldn't match ArrayFieldAccess (stride 1) values: " + - new_val->to_string(env)); - } - auto idx = match_result.maps.regs.at(0); - auto base = match_result.maps.regs.at(1); - assert(idx.has_value() && base.has_value()); - - std::vector tokens = m_deref_tokens; - tokens.push_back(DerefToken::make_int_expr(var_to_form(idx.value(), pool))); - - auto deref = pool.alloc_element(var_to_form(base.value(), pool), false, tokens); - result->push_back(deref); - } else if (is_power_of_two(m_expected_stride, &power_of_two)) { - // (+ (sll (the-as uint a1-0) 2) (the-as int a0-0)) - auto reg0_matcher = - Matcher::match_or({Matcher::any_reg(0), Matcher::cast("uint", Matcher::any_reg(0))}); - auto reg1_matcher = - Matcher::match_or({Matcher::any_reg(1), Matcher::cast("int", Matcher::any_reg(1))}); - auto sll_matcher = - Matcher::fixed_op(FixedOperatorKind::SHL, {reg0_matcher, Matcher::integer(power_of_two)}); - auto matcher = Matcher::fixed_op(FixedOperatorKind::ADDITION, {sll_matcher, reg1_matcher}); - auto match_result = match(matcher, new_val); - if (!match_result.matched) { - throw std::runtime_error("Couldn't match ArrayFieldAccess (stride power of 2) values: " + - new_val->to_string(env)); - } - auto idx = match_result.maps.regs.at(0); - auto base = match_result.maps.regs.at(1); - assert(idx.has_value() && base.has_value()); - std::vector tokens = m_deref_tokens; - tokens.push_back(DerefToken::make_int_expr(var_to_form(idx.value(), pool))); + if (m_constant_offset == 0) { + if (m_expected_stride == 1) { + throw std::runtime_error("One case, not yet implemented (no offset)"); + } else if (is_power_of_two(m_expected_stride, &power_of_two)) { + // reg0 is base + // reg1 is idx + + auto reg0_matcher = + Matcher::match_or({Matcher::cast("uint", Matcher::any(0)), Matcher::any(0)}); + auto reg1_matcher = + Matcher::match_or({Matcher::cast("uint", Matcher::any(1)), Matcher::any(1)}); + auto sll_matcher = + Matcher::fixed_op(FixedOperatorKind::SHL, {reg1_matcher, Matcher::integer(power_of_two)}); + sll_matcher = Matcher::match_or({Matcher::cast("uint", sll_matcher), sll_matcher}); + auto matcher = Matcher::fixed_op(FixedOperatorKind::ADDITION, {reg0_matcher, sll_matcher}); + auto match_result = match(matcher, new_val); + if (!match_result.matched) { + fmt::print("power {}\n", power_of_two); + throw std::runtime_error( + "Couldn't match ArrayFieldAccess (stride power of 2, 0 offset) values: " + + new_val->to_string(env)); + } + + auto idx = match_result.maps.forms.at(1); + auto base = match_result.maps.forms.at(0); + assert(idx && base); + + std::vector tokens = m_deref_tokens; + tokens.push_back(DerefToken::make_int_expr(idx)); - auto deref = pool.alloc_element(var_to_form(base.value(), pool), false, tokens); - result->push_back(deref); + auto deref = pool.alloc_element(base, false, tokens); + result->push_back(deref); + } else { + throw std::runtime_error("Not power of two case, not yet implemented (no offset)"); + } } else { - throw std::runtime_error("Not power of two case, not yet implemented"); + if (m_expected_stride == 1) { + // reg0 is idx + auto reg0_matcher = + Matcher::match_or({Matcher::any_reg(0), Matcher::cast("int", Matcher::any_reg(0))}); + // reg1 is base + auto reg1_matcher = + Matcher::match_or({Matcher::any_reg(1), Matcher::cast("int", Matcher::any_reg(1))}); + auto matcher = Matcher::fixed_op(FixedOperatorKind::ADDITION, {reg0_matcher, reg1_matcher}); + auto match_result = match(matcher, new_val); + if (!match_result.matched) { + throw std::runtime_error("Couldn't match ArrayFieldAccess (stride 1) values: " + + new_val->to_string(env)); + } + auto idx = match_result.maps.regs.at(0); + auto base = match_result.maps.regs.at(1); + assert(idx.has_value() && base.has_value()); + + std::vector tokens = m_deref_tokens; + tokens.push_back(DerefToken::make_int_expr(var_to_form(idx.value(), pool))); + + auto deref = pool.alloc_element(var_to_form(base.value(), pool), false, tokens); + result->push_back(deref); + } else if (is_power_of_two(m_expected_stride, &power_of_two)) { + // (+ (sll (the-as uint a1-0) 2) (the-as int a0-0)) + // (+ gp-0 (the-as uint (shl (the-as uint (shl (the-as uint s4-0) 2)) 2))) + auto reg0_matcher = + Matcher::match_or({Matcher::any_reg(0), Matcher::cast("uint", Matcher::any_reg(0))}); + auto reg1_matcher = + Matcher::match_or({Matcher::any_reg(1), Matcher::cast("int", Matcher::any_reg(1))}); + auto sll_matcher = + Matcher::fixed_op(FixedOperatorKind::SHL, {reg0_matcher, Matcher::integer(power_of_two)}); + auto matcher = Matcher::fixed_op(FixedOperatorKind::ADDITION, {sll_matcher, reg1_matcher}); + auto match_result = match(matcher, new_val); + if (!match_result.matched) { + throw std::runtime_error("Couldn't match ArrayFieldAccess (stride power of 2) values: " + + new_val->to_string(env)); + } + auto idx = match_result.maps.regs.at(0); + auto base = match_result.maps.regs.at(1); + assert(idx.has_value() && base.has_value()); + + std::vector tokens = m_deref_tokens; + tokens.push_back(DerefToken::make_int_expr(var_to_form(idx.value(), pool))); + + auto deref = pool.alloc_element(var_to_form(base.value(), pool), false, tokens); + result->push_back(deref); + } else { + throw std::runtime_error("Not power of two case, not yet implemented (offset)"); + } } } diff --git a/decompiler/IR2/IR2_common.h b/decompiler/IR2/IR2_common.h index ebada81173..173b713d6e 100644 --- a/decompiler/IR2/IR2_common.h +++ b/decompiler/IR2/IR2_common.h @@ -112,6 +112,7 @@ enum class FixedOperatorKind { CDR, NEW, OBJECT_NEW, + TYPE_NEW, INVALID }; From 78b4131cc63f197326e3515b1f80228ff59ecb2f Mon Sep 17 00:00:00 2001 From: water Date: Wed, 3 Feb 2021 21:15:23 -0500 Subject: [PATCH 06/10] cleanup --- decompiler/IR2/AtomicOp.h | 3 + decompiler/IR2/AtomicOpForm.cpp | 2 +- decompiler/IR2/Form.cpp | 5 +- decompiler/IR2/Form.h | 88 ++++-- decompiler/IR2/FormExpressionAnalysis.cpp | 366 ++++++++++++++++------ decompiler/IR2/FormStack.cpp | 27 +- decompiler/IR2/FormStack.h | 7 +- decompiler/analysis/atomic_op_builder.cpp | 2 + decompiler/analysis/cfg_builder.cpp | 2 +- 9 files changed, 363 insertions(+), 139 deletions(-) diff --git a/decompiler/IR2/AtomicOp.h b/decompiler/IR2/AtomicOp.h index b5f07d53ce..c78767a350 100644 --- a/decompiler/IR2/AtomicOp.h +++ b/decompiler/IR2/AtomicOp.h @@ -355,10 +355,13 @@ class IR2_Condition { const SimpleAtom& src(int i) const { return m_src[i]; } ConditionElement* get_as_form(FormPool& pool, const Env& env, int my_idx) const; void collect_vars(VariableSet& vars) const; + void make_flipped() { m_flipped_eval = true; } + bool flipped() const { return m_flipped_eval; } private: Kind m_kind = Kind::INVALID; SimpleAtom m_src[2]; + bool m_flipped_eval = false; }; std::string get_condition_kind_name(IR2_Condition::Kind kind); diff --git a/decompiler/IR2/AtomicOpForm.cpp b/decompiler/IR2/AtomicOpForm.cpp index 8bd99089bf..34c952416d 100644 --- a/decompiler/IR2/AtomicOpForm.cpp +++ b/decompiler/IR2/AtomicOpForm.cpp @@ -45,7 +45,7 @@ ConditionElement* IR2_Condition::get_as_form(FormPool& pool, const Env& env, int for (int i = 0; i < get_condition_num_args(m_kind); i++) { vars[i] = m_src[i]; } - return pool.alloc_element(m_kind, vars[0], vars[1], consumed); + return pool.alloc_element(m_kind, vars[0], vars[1], consumed, m_flipped_eval); } FormElement* SetVarOp::get_as_form(FormPool& pool, const Env& env) const { diff --git a/decompiler/IR2/Form.cpp b/decompiler/IR2/Form.cpp index 0a8191d643..c2e3663dd8 100644 --- a/decompiler/IR2/Form.cpp +++ b/decompiler/IR2/Form.cpp @@ -356,8 +356,9 @@ void AtomicOpElement::get_modified_regs(RegSet& regs) const { ConditionElement::ConditionElement(IR2_Condition::Kind kind, std::optional src0, std::optional src1, - RegSet consumed) - : m_kind(kind), m_consumed(std::move(consumed)) { + RegSet consumed, + bool flipped) + : m_kind(kind), m_consumed(std::move(consumed)), m_flipped(flipped) { m_src[0] = src0; m_src[1] = src1; } diff --git a/decompiler/IR2/Form.h b/decompiler/IR2/Form.h index 13987dd70c..4906bf1268 100644 --- a/decompiler/IR2/Form.h +++ b/decompiler/IR2/Form.h @@ -30,13 +30,15 @@ class FormElement { virtual void collect_vars(VariableSet& vars) const = 0; virtual void get_modified_regs(RegSet& regs) const = 0; std::string to_string(const Env& env) const; + bool has_side_effects(); // push the result of this operation to the operation stack virtual void push_to_stack(const Env& env, FormPool& pool, FormStack& stack); virtual void update_from_stack(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result); + std::vector* result, + bool allow_side_effects); protected: friend class Form; @@ -59,53 +61,62 @@ class SimpleExpressionElement : public FormElement { void update_from_stack(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result) override; + std::vector* result, + bool allow_side_effects) override; void get_modified_regs(RegSet& regs) const override; - void update_from_stack_identity(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result); + std::vector* result, + bool allow_side_effects); void update_from_stack_gpr_to_fpr(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result); + std::vector* result, + bool allow_side_effects); void update_from_stack_fpr_to_gpr(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result); + std::vector* result, + bool allow_side_effects); void update_from_stack_div_s(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result); + std::vector* result, + bool allow_side_effects); void update_from_stack_add_i(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result); + std::vector* result, + bool allow_side_effects); void update_from_stack_mult_si(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result); + std::vector* result, + bool allow_side_effects); void update_from_stack_lognot(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result); - + std::vector* result, + bool allow_side_effects); void update_from_stack_force_si_2(const Env& env, FixedOperatorKind kind, FormPool& pool, FormStack& stack, - std::vector* result); + std::vector* result, + bool allow_side_effects); void update_from_stack_force_ui_2(const Env& env, FixedOperatorKind kind, FormPool& pool, FormStack& stack, - std::vector* result); + std::vector* result, + bool allow_side_effects); void update_from_stack_copy_first_int_2(const Env& env, FixedOperatorKind kind, FormPool& pool, FormStack& stack, - std::vector* result); + std::vector* result, + bool allow_side_effects); const SimpleExpression& expr() const { return m_expr; } @@ -151,7 +162,8 @@ class LoadSourceElement : public FormElement { void update_from_stack(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result) override; + std::vector* result, + bool allow_side_effects) override; void get_modified_regs(RegSet& regs) const override; private: @@ -194,7 +206,8 @@ class SetVarElement : public FormElement { void update_from_stack(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result) override; + std::vector* result, + bool allow_side_effects) override; void get_modified_regs(RegSet& regs) const override; const Variable& dst() const { return m_dst; } @@ -259,7 +272,8 @@ class ConditionElement : public FormElement { ConditionElement(IR2_Condition::Kind kind, std::optional src0, std::optional src1, - RegSet consumed); + RegSet consumed, + bool flipped); goos::Object to_form(const Env& env) const override; goos::Object to_form_as_condition(const Env& env) const override; void apply(const std::function& f) override; @@ -269,7 +283,8 @@ class ConditionElement : public FormElement { void update_from_stack(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result) override; + std::vector* result, + bool allow_side_effects) override; void get_modified_regs(RegSet& regs) const override; void invert(); const RegSet& consume() const { return m_consumed; } @@ -278,6 +293,7 @@ class ConditionElement : public FormElement { IR2_Condition::Kind m_kind; std::optional m_src[2]; RegSet m_consumed; + bool m_flipped; }; /*! @@ -293,7 +309,8 @@ class FunctionCallElement : public FormElement { void update_from_stack(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result) override; + std::vector* result, + bool allow_side_effects) override; void push_to_stack(const Env& env, FormPool& pool, FormStack& stack) override; void get_modified_regs(RegSet& regs) const override; @@ -491,7 +508,8 @@ class ShortCircuitElement : public FormElement { void update_from_stack(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result) override; + std::vector* result, + bool allow_side_effects) override; void get_modified_regs(RegSet& regs) const override; }; @@ -534,7 +552,8 @@ class AbsElement : public FormElement { void update_from_stack(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result) override; + std::vector* result, + bool allow_side_effects) override; void get_modified_regs(RegSet& regs) const override; Variable source; RegSet consumed; @@ -563,7 +582,8 @@ class AshElement : public FormElement { void update_from_stack(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result) override; + std::vector* result, + bool allow_side_effects) override; void get_modified_regs(RegSet& regs) const override; }; @@ -584,7 +604,8 @@ class TypeOfElement : public FormElement { void update_from_stack(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result) override; + std::vector* result, + bool allow_side_effects) override; }; /*! @@ -681,7 +702,8 @@ class GenericElement : public FormElement { void update_from_stack(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result) override; + std::vector* result, + bool allow_side_effects) override; void get_modified_regs(RegSet& regs) const override; void push_to_stack(const Env& env, FormPool& pool, FormStack& stack) override; const GenericOperator& op() const { return m_head; } @@ -704,7 +726,8 @@ class CastElement : public FormElement { void update_from_stack(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result) override; + std::vector* result, + bool allow_side_effects) override; const TypeSpec& type() const { return m_type; } const Form* source() const { return m_source; } Form* source() { return m_source; } @@ -756,7 +779,8 @@ class DerefElement : public FormElement { void update_from_stack(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result) override; + std::vector* result, + bool allow_side_effects) override; void get_modified_regs(RegSet& regs) const override; bool is_addr_of() const { return m_is_addr_of; } @@ -780,7 +804,8 @@ class DynamicMethodAccess : public FormElement { void update_from_stack(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result) override; + std::vector* result, + bool allow_side_effects) override; void get_modified_regs(RegSet& regs) const override; private: @@ -800,7 +825,8 @@ class ArrayFieldAccess : public FormElement { void update_from_stack(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result) override; + std::vector* result, + bool allow_side_effects) override; void get_modified_regs(RegSet& regs) const override; private: @@ -890,7 +916,11 @@ class Form { void apply_form(const std::function& f); void collect_vars(VariableSet& vars) const; - void update_children_from_stack(const Env& env, FormPool& pool, FormStack& stack); + void update_children_from_stack(const Env& env, + FormPool& pool, + FormStack& stack, + bool allow_side_effects); + bool has_side_effects(); void get_modified_regs(RegSet& regs) const; FormElement* parent_element = nullptr; diff --git a/decompiler/IR2/FormExpressionAnalysis.cpp b/decompiler/IR2/FormExpressionAnalysis.cpp index a40e7c0c15..1f54bc7414 100644 --- a/decompiler/IR2/FormExpressionAnalysis.cpp +++ b/decompiler/IR2/FormExpressionAnalysis.cpp @@ -9,21 +9,93 @@ * - check out if we can push/pop variables instead of registers? */ +/*! + * Basic idea: push partial expressions to the stack and pop them off as they are used. + * Leftovers are left on the stack and flushed out as set! + * But the challenge is knowing it's safe to pop something off of the stack. + * If the value is used after the read again, then it's not. + * + * The tricky situation is to accidentally generate this + * [simplified, we would never group like this, but similar things are possible] + * (+ s5 (begin (set! s5 z) (+ x y))) + * when the value of s5 used is after the (set! s5 z). + * + * To avoid that case, we make sure that anything after s5 in the expression cannot modify s5. If it + * does, we just don't do the expression building and leave it as smaller expressions. To accomplish + * this, we submit a batch of registers to pop, and the stack takes care of making sure this + * property will hold. + * + * But what about + * (+ (* s5 s4) (begin (set! s5 z) (+ x y))) + * Luckily this isn't a problem. The actual (* s5 s4) will only be inserted if the multiply + * instruction actually occurs before the second term in the outer addition. + * The issue only occurs when a pop fails and we just insert a variable name. + * In other words, this variable "insert" is the only that lets us bypass ordering. + * (which makes me wonder if I should have solved this by being more careful with that... + * but I have no immediate ideas unless we allow backtracking in popping which is bad) + * + * Now sometimes it's too hard to figure out exactly all of the variables we might pop and do + * it all in one batch. We pop one thing, but we know that there are registers it shouldn't modify. + * Instead of the barrier register approach, we do something easier: explicitly forbid + * "outside of expression register variable side effects". + * This means that you modify a register that's visible outside of the expression. Confusingly, + * memory access doesn't count as a side effect and register read/writes that are part of a + * chained together expression do not count. + * This is kind of a hacky workaround that I'd really like to remove eventually. + * I think it can basically be solved by being strategic in when we update from stack and we can + * avoid the highly general "update some unknown vector of unknown things from the stack" + */ + namespace decompiler { +bool Form::has_side_effects() { + bool has_side_effect = false; + apply([&](FormElement* elt) { + if (dynamic_cast(elt)) { + has_side_effect = true; + } + }); + return has_side_effect; +} + +bool FormElement::has_side_effects() { + bool has_side_effect = false; + apply([&](FormElement* elt) { + if (dynamic_cast(elt)) { + has_side_effect = true; + } + }); + return has_side_effect; +} + namespace { +/*! + * Create a form which represents a variable. + */ Form* var_to_form(const Variable& var, FormPool& pool) { return pool.alloc_single_element_form(nullptr, SimpleAtom::make_var(var)); } +/*! + * Pop values of off the expression stack + * @param vars : list of variables to pop. In order of source code evaluation. + * @param env : the decompilation environment + * @param pool : form allocation pool + * @param stack : stack to pop from + * @param output : list of locations to push results. + * @param consumes : if you have a different list of variables that are consumed by this operation. + */ void pop_helper(const std::vector& vars, const Env& env, FormPool& pool, FormStack& stack, const std::vector*>& output, + bool allow_side_effects, const std::optional& consumes = std::nullopt) { + // to submit to stack to attempt popping std::vector submit_regs; + // submit_reg[i] is for var submit_reg_to_var[i] std::vector submit_reg_to_var; // build submission for stack @@ -32,59 +104,70 @@ void pop_helper(const std::vector& vars, auto& ri = env.reg_use().op.at(var.idx()); RegSet consumes_to_use = consumes.value_or(ri.consumes); if (consumes_to_use.find(var.reg()) != consumes_to_use.end()) { - // could pop + // we consume the register, so it's safe to try popping. submit_reg_to_var.push_back(var_idx); submit_regs.push_back(var.reg()); - } else { - // no way to pop } } - // submit! - // auto result = stack.pop(submit_regs, env); + // submit and get a result! If the stack has nothing to pop, the result here may be nullptr. std::vector pop_result; - // loop in reverse. + // loop in reverse (later vals first) for (size_t i = submit_regs.size(); i-- > 0;) { // figure out what var we are: auto var_idx = submit_reg_to_var.at(i); - // anything _less or equal_ than this should be unmodified by the pop - // note - on the actual popped, pop_reg won't consider the destination. + // anything _less_ than this should be unmodified by the pop + // it's fine to modify yourself in your pop. RegSet pop_barrier_regs; for (size_t j = 0; j < var_idx; j++) { pop_barrier_regs.insert(vars.at(j).reg()); } - pop_result.push_back(stack.pop_reg(submit_regs.at(i), pop_barrier_regs, env)); + // do the pop, with the barrier to prevent out-of-sequence popping. + pop_result.push_back( + stack.pop_reg(submit_regs.at(i), pop_barrier_regs, env, allow_side_effects)); } + // now flip back to the source order for making the final result std::reverse(pop_result.begin(), pop_result.end()); + // final result forms. Will be nullptr if: we didn't try popping OR popping from stack failed. std::vector forms; forms.resize(vars.size(), nullptr); if (!pop_result.empty()) { // success! for (size_t i = 0; i < submit_regs.size(); i++) { + // fill out vars from our submission forms.at(submit_reg_to_var.at(i)) = pop_result.at(i); } } - // fill in the missing pieces: + // write the output for (size_t i = 0; i < forms.size(); i++) { if (forms.at(i)) { + // we got a form. inline these in the result for (auto x : forms.at(i)->elts()) { output.at(i)->push_back(x); } } else { + // we got nothing, just insert the variable name. output.at(i)->push_back(pool.alloc_element( SimpleAtom::make_var(vars.at(i)).as_expr(), vars.at(i).idx())); } } } +/*! + * Pop each variable in the input list into a form. The variables should be given in the order + * they are evaluated in the source. It is safe to put the result of these in the same expression. + * This uses the barrier register approach, but it is only effective if you put all registers + * appearing at the same level. + */ std::vector pop_to_forms(const std::vector& vars, const Env& env, FormPool& pool, FormStack& stack, + bool allow_side_effects, const std::optional& consumes = std::nullopt) { std::vector forms; std::vector> forms_out; @@ -96,7 +179,7 @@ std::vector pop_to_forms(const std::vector& vars, form_ptrs.push_back(&x); } - pop_helper(vars, env, pool, stack, form_ptrs, consumes); + pop_helper(vars, env, pool, stack, form_ptrs, allow_side_effects, consumes); for (auto& x : forms_out) { forms.push_back(pool.alloc_sequence_form(nullptr, x)); @@ -104,52 +187,90 @@ std::vector pop_to_forms(const std::vector& vars, return forms; } +// TODO - if we start using child classes of float/int/uint for things like degrees/meters +// we may need to adjust these. + +/*! + * type == float (exactly)? + */ bool is_float_type(const Env& env, int my_idx, Variable var) { auto type = env.get_types_before_op(my_idx).get(var.reg()).typespec(); return type == TypeSpec("float"); } +/*! + * type == int (exactly)? + */ bool is_int_type(const Env& env, int my_idx, Variable var) { auto type = env.get_types_before_op(my_idx).get(var.reg()).typespec(); return type == TypeSpec("int"); } +/*! + * type == uint (exactly)? + */ bool is_uint_type(const Env& env, int my_idx, Variable var) { auto type = env.get_types_before_op(my_idx).get(var.reg()).typespec(); return type == TypeSpec("uint"); } } // namespace -void Form::update_children_from_stack(const Env& env, FormPool& pool, FormStack& stack) { +/*! + * Update a form to use values from the stack. Won't push to the stack. + * This should be used to update a Form that immediately follows something being pushed. + * Will only change the first element of the form - anything after that will jump sequencing + */ +void Form::update_children_from_stack(const Env& env, + FormPool& pool, + FormStack& stack, + bool allow_side_effects) { + assert(!m_elements.empty()); + std::vector new_elts; - for (auto& elt : m_elements) { - elt->update_from_stack(env, pool, stack, &new_elts); + + for (size_t i = 0; i < m_elements.size(); i++) { + if (i == 0) { + // only bother doing the first one. + m_elements[i]->update_from_stack(env, pool, stack, &new_elts, allow_side_effects); + } else { + new_elts.push_back(m_elements[i]); + } } + m_elements = new_elts; } +/*! + * Default update_from_stack for an element if no specific one is provided. + */ void FormElement::update_from_stack(const Env& env, FormPool&, FormStack&, - std::vector*) { + std::vector*, + bool) { throw std::runtime_error(fmt::format("update_from_stack NYI for {}", to_string(env))); } +/*! + * Update a LoadSourceElement from the stack. + */ void LoadSourceElement::update_from_stack(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result) { - m_addr->update_children_from_stack(env, pool, stack); + std::vector* result, + bool allow_side_effects) { + m_addr->update_children_from_stack(env, pool, stack, allow_side_effects); result->push_back(this); } void SimpleExpressionElement::update_from_stack_identity(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result) { + std::vector* result, + bool allow_side_effects) { auto& arg = m_expr.get_arg(0); if (arg.is_var()) { - pop_helper({arg.var()}, env, pool, stack, {result}); + pop_helper({arg.var()}, env, pool, stack, {result}, allow_side_effects); } else if (arg.is_static_addr()) { // for now, do nothing. result->push_back(this); @@ -164,14 +285,15 @@ void SimpleExpressionElement::update_from_stack_identity(const Env& env, void SimpleExpressionElement::update_from_stack_gpr_to_fpr(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result) { + std::vector* result, + bool allow_side_effects) { auto src = m_expr.get_arg(0); auto src_type = env.get_types_before_op(m_my_idx).get(src.var().reg()); if (src_type.typespec() == TypeSpec("float")) { // set ourself to identity. m_expr = src.as_expr(); // then go again. - update_from_stack(env, pool, stack, result); + update_from_stack(env, pool, stack, result, allow_side_effects); } else { throw std::runtime_error(fmt::format("GPR -> FPR applied to a {}", src_type.print())); } @@ -180,14 +302,15 @@ void SimpleExpressionElement::update_from_stack_gpr_to_fpr(const Env& env, void SimpleExpressionElement::update_from_stack_fpr_to_gpr(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result) { + std::vector* result, + bool allow_side_effects) { auto src = m_expr.get_arg(0); auto src_type = env.get_types_before_op(m_my_idx).get(src.var().reg()); if (src_type.typespec() == TypeSpec("float")) { // set ourself to identity. m_expr = src.as_expr(); // then go again. - update_from_stack(env, pool, stack, result); + update_from_stack(env, pool, stack, result, allow_side_effects); } else { throw std::runtime_error(fmt::format("FPR -> GPR applied to a {}", src_type.print())); } @@ -196,12 +319,14 @@ void SimpleExpressionElement::update_from_stack_fpr_to_gpr(const Env& env, void SimpleExpressionElement::update_from_stack_div_s(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result) { + std::vector* result, + bool allow_side_effects) { if (is_float_type(env, m_my_idx, m_expr.get_arg(0).var()) && is_float_type(env, m_my_idx, m_expr.get_arg(1).var())) { // todo - check the order here - auto args = pop_to_forms({m_expr.get_arg(0).var(), m_expr.get_arg(1).var()}, env, pool, stack); + auto args = pop_to_forms({m_expr.get_arg(0).var(), m_expr.get_arg(1).var()}, env, pool, stack, + allow_side_effects); auto new_form = pool.alloc_element( GenericOperator::make_fixed(FixedOperatorKind::DIVISION), args.at(0), args.at(1)); result->push_back(new_form); @@ -213,7 +338,8 @@ void SimpleExpressionElement::update_from_stack_div_s(const Env& env, void SimpleExpressionElement::update_from_stack_add_i(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result) { + std::vector* result, + bool allow_side_effects) { auto arg0_i = is_int_type(env, m_my_idx, m_expr.get_arg(0).var()); auto arg0_u = is_uint_type(env, m_my_idx, m_expr.get_arg(0).var()); @@ -228,9 +354,10 @@ void SimpleExpressionElement::update_from_stack_add_i(const Env& env, std::vector args; if (arg1_reg) { - args = pop_to_forms({m_expr.get_arg(0).var(), m_expr.get_arg(1).var()}, env, pool, stack); + args = pop_to_forms({m_expr.get_arg(0).var(), m_expr.get_arg(1).var()}, env, pool, stack, + allow_side_effects); } else { - args = pop_to_forms({m_expr.get_arg(0).var()}, env, pool, stack); + args = pop_to_forms({m_expr.get_arg(0).var()}, env, pool, stack, allow_side_effects); args.push_back(pool.alloc_single_element_form(nullptr, m_expr.get_arg(1))); } @@ -250,11 +377,13 @@ void SimpleExpressionElement::update_from_stack_add_i(const Env& env, void SimpleExpressionElement::update_from_stack_mult_si(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result) { + std::vector* result, + bool allow_side_effects) { auto arg0_i = is_int_type(env, m_my_idx, m_expr.get_arg(0).var()); auto arg1_i = is_int_type(env, m_my_idx, m_expr.get_arg(1).var()); - auto args = pop_to_forms({m_expr.get_arg(0).var(), m_expr.get_arg(1).var()}, env, pool, stack); + auto args = pop_to_forms({m_expr.get_arg(0).var(), m_expr.get_arg(1).var()}, env, pool, stack, + allow_side_effects); if (!arg0_i) { args.at(0) = pool.alloc_single_element_form(nullptr, TypeSpec("int"), args.at(0)); @@ -273,7 +402,8 @@ void SimpleExpressionElement::update_from_stack_force_si_2(const Env& env, FixedOperatorKind kind, FormPool& pool, FormStack& stack, - std::vector* result) { + std::vector* result, + bool allow_side_effects) { auto arg0_i = is_int_type(env, m_my_idx, m_expr.get_arg(0).var()); bool arg1_i = true; bool arg1_reg = m_expr.get_arg(1).is_var(); @@ -285,9 +415,10 @@ void SimpleExpressionElement::update_from_stack_force_si_2(const Env& env, std::vector args; if (arg1_reg) { - args = pop_to_forms({m_expr.get_arg(0).var(), m_expr.get_arg(1).var()}, env, pool, stack); + args = pop_to_forms({m_expr.get_arg(0).var(), m_expr.get_arg(1).var()}, env, pool, stack, + allow_side_effects); } else { - args = pop_to_forms({m_expr.get_arg(0).var()}, env, pool, stack); + args = pop_to_forms({m_expr.get_arg(0).var()}, env, pool, stack, allow_side_effects); args.push_back(pool.alloc_single_element_form(nullptr, m_expr.get_arg(1))); } @@ -308,7 +439,8 @@ void SimpleExpressionElement::update_from_stack_force_ui_2(const Env& env, FixedOperatorKind kind, FormPool& pool, FormStack& stack, - std::vector* result) { + std::vector* result, + bool allow_side_effects) { auto arg0_u = is_uint_type(env, m_my_idx, m_expr.get_arg(0).var()); bool arg1_u = true; bool arg1_reg = m_expr.get_arg(1).is_var(); @@ -320,9 +452,10 @@ void SimpleExpressionElement::update_from_stack_force_ui_2(const Env& env, std::vector args; if (arg1_reg) { - args = pop_to_forms({m_expr.get_arg(0).var(), m_expr.get_arg(1).var()}, env, pool, stack); + args = pop_to_forms({m_expr.get_arg(0).var(), m_expr.get_arg(1).var()}, env, pool, stack, + allow_side_effects); } else { - args = pop_to_forms({m_expr.get_arg(0).var()}, env, pool, stack); + args = pop_to_forms({m_expr.get_arg(0).var()}, env, pool, stack, allow_side_effects); args.push_back(pool.alloc_single_element_form(nullptr, m_expr.get_arg(1))); } @@ -339,16 +472,16 @@ void SimpleExpressionElement::update_from_stack_force_ui_2(const Env& env, result->push_back(new_form); } -void SimpleExpressionElement::update_from_stack_copy_first_int_2( - const Env& env, - FixedOperatorKind kind, - FormPool& pool, - FormStack& stack, - std::vector* result) { +void SimpleExpressionElement::update_from_stack_copy_first_int_2(const Env& env, + FixedOperatorKind kind, + FormPool& pool, + FormStack& stack, + std::vector* result, + bool allow_side_effects) { auto arg0_i = is_int_type(env, m_my_idx, m_expr.get_arg(0).var()); auto arg0_u = is_uint_type(env, m_my_idx, m_expr.get_arg(0).var()); if (!m_expr.get_arg(1).is_var()) { - auto args = pop_to_forms({m_expr.get_arg(0).var()}, env, pool, stack); + auto args = pop_to_forms({m_expr.get_arg(0).var()}, env, pool, stack, allow_side_effects); auto new_form = pool.alloc_element( GenericOperator::make_fixed(kind), args.at(0), @@ -359,7 +492,8 @@ void SimpleExpressionElement::update_from_stack_copy_first_int_2( auto arg1_i = is_int_type(env, m_my_idx, m_expr.get_arg(1).var()); auto arg1_u = is_uint_type(env, m_my_idx, m_expr.get_arg(1).var()); - auto args = pop_to_forms({m_expr.get_arg(0).var(), m_expr.get_arg(1).var()}, env, pool, stack); + auto args = pop_to_forms({m_expr.get_arg(0).var(), m_expr.get_arg(1).var()}, env, pool, stack, + allow_side_effects); if ((arg0_i && arg1_i) || (arg0_u && arg1_u)) { auto new_form = pool.alloc_element(GenericOperator::make_fixed(kind), @@ -377,8 +511,9 @@ void SimpleExpressionElement::update_from_stack_copy_first_int_2( void SimpleExpressionElement::update_from_stack_lognot(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result) { - auto args = pop_to_forms({m_expr.get_arg(0).var()}, env, pool, stack); + std::vector* result, + bool allow_side_effects) { + auto args = pop_to_forms({m_expr.get_arg(0).var()}, env, pool, stack, allow_side_effects); auto new_form = pool.alloc_element( GenericOperator::make_fixed(FixedOperatorKind::LOGNOT), args.at(0)); result->push_back(new_form); @@ -387,67 +522,81 @@ void SimpleExpressionElement::update_from_stack_lognot(const Env& env, void SimpleExpressionElement::update_from_stack(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result) { + std::vector* result, + bool allow_side_effects) { switch (m_expr.kind()) { case SimpleExpression::Kind::IDENTITY: - update_from_stack_identity(env, pool, stack, result); + update_from_stack_identity(env, pool, stack, result, allow_side_effects); break; case SimpleExpression::Kind::GPR_TO_FPR: - update_from_stack_gpr_to_fpr(env, pool, stack, result); + update_from_stack_gpr_to_fpr(env, pool, stack, result, allow_side_effects); break; case SimpleExpression::Kind::FPR_TO_GPR: - update_from_stack_fpr_to_gpr(env, pool, stack, result); + update_from_stack_fpr_to_gpr(env, pool, stack, result, allow_side_effects); break; case SimpleExpression::Kind::DIV_S: - update_from_stack_div_s(env, pool, stack, result); + update_from_stack_div_s(env, pool, stack, result, allow_side_effects); break; case SimpleExpression::Kind::ADD: - update_from_stack_add_i(env, pool, stack, result); + update_from_stack_add_i(env, pool, stack, result, allow_side_effects); break; case SimpleExpression::Kind::SUB: - update_from_stack_copy_first_int_2(env, FixedOperatorKind::SUBTRACTION, pool, stack, result); + update_from_stack_copy_first_int_2(env, FixedOperatorKind::SUBTRACTION, pool, stack, result, + allow_side_effects); break; case SimpleExpression::Kind::MUL_SIGNED: - update_from_stack_mult_si(env, pool, stack, result); + update_from_stack_mult_si(env, pool, stack, result, allow_side_effects); break; case SimpleExpression::Kind::DIV_SIGNED: - update_from_stack_force_si_2(env, FixedOperatorKind::DIVISION, pool, stack, result); + update_from_stack_force_si_2(env, FixedOperatorKind::DIVISION, pool, stack, result, + allow_side_effects); break; case SimpleExpression::Kind::MOD_SIGNED: - update_from_stack_force_si_2(env, FixedOperatorKind::MOD, pool, stack, result); + update_from_stack_force_si_2(env, FixedOperatorKind::MOD, pool, stack, result, + allow_side_effects); break; case SimpleExpression::Kind::MIN_SIGNED: - update_from_stack_force_si_2(env, FixedOperatorKind::MIN, pool, stack, result); + update_from_stack_force_si_2(env, FixedOperatorKind::MIN, pool, stack, result, + allow_side_effects); break; case SimpleExpression::Kind::MAX_SIGNED: - update_from_stack_force_si_2(env, FixedOperatorKind::MAX, pool, stack, result); + update_from_stack_force_si_2(env, FixedOperatorKind::MAX, pool, stack, result, + allow_side_effects); break; case SimpleExpression::Kind::AND: - update_from_stack_copy_first_int_2(env, FixedOperatorKind::LOGAND, pool, stack, result); + update_from_stack_copy_first_int_2(env, FixedOperatorKind::LOGAND, pool, stack, result, + allow_side_effects); break; case SimpleExpression::Kind::OR: - update_from_stack_copy_first_int_2(env, FixedOperatorKind::LOGIOR, pool, stack, result); + update_from_stack_copy_first_int_2(env, FixedOperatorKind::LOGIOR, pool, stack, result, + allow_side_effects); break; case SimpleExpression::Kind::NOR: - update_from_stack_copy_first_int_2(env, FixedOperatorKind::LOGNOR, pool, stack, result); + update_from_stack_copy_first_int_2(env, FixedOperatorKind::LOGNOR, pool, stack, result, + allow_side_effects); break; case SimpleExpression::Kind::XOR: - update_from_stack_copy_first_int_2(env, FixedOperatorKind::LOGXOR, pool, stack, result); + update_from_stack_copy_first_int_2(env, FixedOperatorKind::LOGXOR, pool, stack, result, + allow_side_effects); break; case SimpleExpression::Kind::LOGNOT: - update_from_stack_lognot(env, pool, stack, result); + update_from_stack_lognot(env, pool, stack, result, allow_side_effects); break; case SimpleExpression::Kind::LEFT_SHIFT: - update_from_stack_force_ui_2(env, FixedOperatorKind::SHL, pool, stack, result); + update_from_stack_force_ui_2(env, FixedOperatorKind::SHL, pool, stack, result, + allow_side_effects); break; case SimpleExpression::Kind::RIGHT_SHIFT_LOGIC: - update_from_stack_force_ui_2(env, FixedOperatorKind::SHR, pool, stack, result); + update_from_stack_force_ui_2(env, FixedOperatorKind::SHR, pool, stack, result, + allow_side_effects); break; case SimpleExpression::Kind::RIGHT_SHIFT_ARITH: - update_from_stack_force_si_2(env, FixedOperatorKind::SAR, pool, stack, result); + update_from_stack_force_si_2(env, FixedOperatorKind::SAR, pool, stack, result, + allow_side_effects); break; case SimpleExpression::Kind::MUL_UNSIGNED: - update_from_stack_force_ui_2(env, FixedOperatorKind::MULTIPLICATION, pool, stack, result); + update_from_stack_force_ui_2(env, FixedOperatorKind::MULTIPLICATION, pool, stack, result, + allow_side_effects); break; default: throw std::runtime_error( @@ -460,7 +609,7 @@ void SimpleExpressionElement::update_from_stack(const Env& env, /////////////////// void SetVarElement::push_to_stack(const Env& env, FormPool& pool, FormStack& stack) { - m_src->update_children_from_stack(env, pool, stack); + m_src->update_children_from_stack(env, pool, stack, true); if (m_src->is_single_element()) { auto src_as_se = dynamic_cast(m_src->back()); if (src_as_se) { @@ -482,15 +631,16 @@ void SetVarElement::push_to_stack(const Env& env, FormPool& pool, FormStack& sta void SetVarElement::update_from_stack(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result) { - m_src->update_children_from_stack(env, pool, stack); + std::vector* result, + bool allow_side_effects) { + m_src->update_children_from_stack(env, pool, stack, allow_side_effects); result->push_back(this); } void SetFormFormElement::push_to_stack(const Env& env, FormPool& pool, FormStack& stack) { // todo - is the order here right? - m_src->update_children_from_stack(env, pool, stack); - m_dst->update_children_from_stack(env, pool, stack); + m_src->update_children_from_stack(env, pool, stack, false); + m_dst->update_children_from_stack(env, pool, stack, false); stack.push_form_element(this, true); } @@ -501,8 +651,9 @@ void SetFormFormElement::push_to_stack(const Env& env, FormPool& pool, FormStack void AshElement::update_from_stack(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result) { - auto forms = pop_to_forms({value, shift_amount}, env, pool, stack, consumed); + std::vector* result, + bool allow_side_effects) { + auto forms = pop_to_forms({value, shift_amount}, env, pool, stack, allow_side_effects, consumed); auto new_form = pool.alloc_element( GenericOperator::make_fixed(FixedOperatorKind::ARITH_SHIFT), forms.at(0), forms.at(1)); result->push_back(new_form); @@ -515,8 +666,9 @@ void AshElement::update_from_stack(const Env& env, void AbsElement::update_from_stack(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result) { - auto forms = pop_to_forms({source}, env, pool, stack, consumed); + std::vector* result, + bool allow_side_effects) { + auto forms = pop_to_forms({source}, env, pool, stack, allow_side_effects, consumed); auto new_form = pool.alloc_element( GenericOperator::make_fixed(FixedOperatorKind::ABS), forms.at(0)); result->push_back(new_form); @@ -529,7 +681,8 @@ void AbsElement::update_from_stack(const Env& env, void FunctionCallElement::update_from_stack(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result) { + std::vector* result, + bool allow_side_effects) { std::vector args; auto nargs = m_op->arg_vars().size(); args.resize(nargs, nullptr); @@ -538,7 +691,7 @@ void FunctionCallElement::update_from_stack(const Env& env, for (size_t i = 0; i < nargs; i++) { all_pop_vars.push_back(m_op->arg_vars().at(i)); } - auto unstacked = pop_to_forms(all_pop_vars, env, pool, stack); + auto unstacked = pop_to_forms(all_pop_vars, env, pool, stack, allow_side_effects); std::vector arg_forms; arg_forms.insert(arg_forms.begin(), unstacked.begin() + 1, unstacked.end()); auto new_form = pool.alloc_element( @@ -638,7 +791,7 @@ void FunctionCallElement::update_from_stack(const Env& env, void FunctionCallElement::push_to_stack(const Env& env, FormPool& pool, FormStack& stack) { std::vector rewritten; - update_from_stack(env, pool, stack, &rewritten); + update_from_stack(env, pool, stack, &rewritten, true); for (auto x : rewritten) { stack.push_form_element(x, true); } @@ -650,9 +803,10 @@ void FunctionCallElement::push_to_stack(const Env& env, FormPool& pool, FormStac void DerefElement::update_from_stack(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result) { + std::vector* result, + bool allow_side_effects) { // todo - update var tokens from stack? - m_base->update_children_from_stack(env, pool, stack); + m_base->update_children_from_stack(env, pool, stack, allow_side_effects); auto as_deref = dynamic_cast(m_base->try_as_single_element()); if (as_deref) { if (!m_is_addr_of && !as_deref->is_addr_of()) { @@ -867,7 +1021,8 @@ void ShortCircuitElement::push_to_stack(const Env& env, FormPool& pool, FormStac void ShortCircuitElement::update_from_stack(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result) { + std::vector* result, + bool) { (void)stack; for (int i = 0; i < int(entries.size()); i++) { auto& entry = entries.at(i); @@ -902,9 +1057,14 @@ void ConditionElement::push_to_stack(const Env& env, FormPool& pool, FormStack& for (int i = 0; i < get_condition_num_args(m_kind); i++) { vars.push_back(m_src[i]->var()); } - std::reverse(vars.begin(), vars.end()); - source_forms = pop_to_forms(vars, env, pool, stack, m_consumed); - std::reverse(source_forms.begin(), source_forms.end()); + if (m_flipped) { + std::reverse(vars.begin(), vars.end()); + } + + source_forms = pop_to_forms(vars, env, pool, stack, true, m_consumed); + if (m_flipped) { + std::reverse(source_forms.begin(), source_forms.end()); + } stack.push_form_element( pool.alloc_element(GenericOperator::make_compare(m_kind), source_forms), @@ -914,14 +1074,15 @@ void ConditionElement::push_to_stack(const Env& env, FormPool& pool, FormStack& void ConditionElement::update_from_stack(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result) { + std::vector* result, + bool allow_side_effects) { std::vector source_forms; std::vector vars; for (int i = 0; i < get_condition_num_args(m_kind); i++) { vars.push_back(m_src[i]->var()); } - source_forms = pop_to_forms(vars, env, pool, stack, m_consumed); + source_forms = pop_to_forms(vars, env, pool, stack, allow_side_effects, m_consumed); result->push_back( pool.alloc_element(GenericOperator::make_compare(m_kind), source_forms)); @@ -969,13 +1130,14 @@ void AtomicOpElement::push_to_stack(const Env& env, FormPool&, FormStack& stack) void GenericElement::update_from_stack(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result) { - if (m_head.m_kind == GenericOperator::Kind::FUNCTION_EXPR) { - m_head.m_function->update_children_from_stack(env, pool, stack); + std::vector* result, + bool) { + for (auto it = m_elts.rbegin(); it != m_elts.rend(); it++) { + (*it)->update_children_from_stack(env, pool, stack, false); } - for (auto& x : m_elts) { - x->update_children_from_stack(env, pool, stack); + if (m_head.m_kind == GenericOperator::Kind::FUNCTION_EXPR) { + m_head.m_function->update_children_from_stack(env, pool, stack, false); } result->push_back(this); } @@ -993,8 +1155,9 @@ void GenericElement::push_to_stack(const Env& env, FormPool& pool, FormStack& st void DynamicMethodAccess::update_from_stack(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result) { - auto new_val = stack.pop_reg(m_source, {}, env); + std::vector* result, + bool allow_side_effects) { + auto new_val = stack.pop_reg(m_source, {}, env, allow_side_effects); auto reg0_matcher = Matcher::match_or({Matcher::any_reg(0), Matcher::cast("uint", Matcher::any_reg(0))}); auto reg1_matcher = @@ -1041,8 +1204,9 @@ bool is_power_of_two(int in, int* out) { void ArrayFieldAccess::update_from_stack(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result) { - auto new_val = stack.pop_reg(m_source, {}, env); + std::vector* result, + bool allow_side_effects) { + auto new_val = stack.pop_reg(m_source, {}, env, allow_side_effects); int power_of_two = 0; if (m_constant_offset == 0) { @@ -1140,8 +1304,9 @@ void ArrayFieldAccess::update_from_stack(const Env& env, void CastElement::update_from_stack(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result) { - m_source->update_children_from_stack(env, pool, stack); + std::vector* result, + bool allow_side_effects) { + m_source->update_children_from_stack(env, pool, stack, allow_side_effects); result->push_back(this); } @@ -1152,8 +1317,9 @@ void CastElement::update_from_stack(const Env& env, void TypeOfElement::update_from_stack(const Env& env, FormPool& pool, FormStack& stack, - std::vector* result) { - value->update_children_from_stack(env, pool, stack); + std::vector* result, + bool allow_side_effects) { + value->update_children_from_stack(env, pool, stack, allow_side_effects); result->push_back(this); } diff --git a/decompiler/IR2/FormStack.cpp b/decompiler/IR2/FormStack.cpp index 5328d2a2f6..449c45ed0a 100644 --- a/decompiler/IR2/FormStack.cpp +++ b/decompiler/IR2/FormStack.cpp @@ -64,8 +64,11 @@ void FormStack::push_form_element(FormElement* elt, bool sequence_point) { m_stack.push_back(entry); } -Form* FormStack::pop_reg(const Variable& var, const RegSet& barrier, const Env& env) { - return pop_reg(var.reg(), barrier, env); +Form* FormStack::pop_reg(const Variable& var, + const RegSet& barrier, + const Env& env, + bool allow_side_effects) { + return pop_reg(var.reg(), barrier, env, allow_side_effects); } namespace { @@ -77,7 +80,10 @@ bool nonempty_intersection(const RegSet& a, const RegSet& b) { } } // namespace -Form* FormStack::pop_reg(Register reg, const RegSet& barrier, const Env& env) { +Form* FormStack::pop_reg(Register reg, + const RegSet& barrier, + const Env& env, + bool allow_side_effects) { (void)env; // keep this for easy debugging. RegSet modified; for (size_t i = m_stack.size(); i-- > 0;) { @@ -85,14 +91,19 @@ Form* FormStack::pop_reg(Register reg, const RegSet& barrier, const Env& env) { if (entry.active) { if (entry.destination->reg() == reg) { entry.source->get_modified_regs(modified); + if (!allow_side_effects && entry.source->has_side_effects()) { + // the source of the set! has a side effect and that's not allowed, so abort. + return nullptr; + } if (nonempty_intersection(modified, barrier)) { + // violating the barrier registers. return nullptr; } entry.active = false; assert(entry.source); if (entry.non_seq_source.has_value()) { assert(entry.sequence_point == false); - auto result = pop_reg(entry.non_seq_source->reg(), barrier, env); + auto result = pop_reg(entry.non_seq_source->reg(), barrier, env, allow_side_effects); if (result) { return result; } @@ -108,9 +119,17 @@ Form* FormStack::pop_reg(Register reg, const RegSet& barrier, const Env& env) { if (entry.source) { assert(!entry.elt); entry.source->get_modified_regs(modified); + if (!allow_side_effects) { + // shouldn't allow skipping past a set! (may be too conservative?) + return nullptr; + } } else { assert(entry.elt); entry.elt->get_modified_regs(modified); + if (!allow_side_effects && entry.elt->has_side_effects()) { + // shouldn't allow skipping past something with a set! (also may be too conservative?) + return nullptr; + } } } } diff --git a/decompiler/IR2/FormStack.h b/decompiler/IR2/FormStack.h index 7bb6e98e55..17c0759b42 100644 --- a/decompiler/IR2/FormStack.h +++ b/decompiler/IR2/FormStack.h @@ -16,8 +16,11 @@ class FormStack { void push_value_to_reg(Variable var, Form* value, bool sequence_point); void push_non_seq_reg_to_reg(const Variable& dst, const Variable& src, Form* src_as_form); void push_form_element(FormElement* elt, bool sequence_point); - Form* pop_reg(const Variable& var, const RegSet& barrier, const Env& env); - Form* pop_reg(Register reg, const RegSet& barrier, const Env& env); + Form* pop_reg(const Variable& var, + const RegSet& barrier, + const Env& env, + bool allow_side_effects); + Form* pop_reg(Register reg, const RegSet& barrier, const Env& env, bool allow_side_effects); bool is_single_expression(); std::vector rewrite(FormPool& pool); std::vector rewrite_to_get_var(FormPool& pool, const Variable& var, const Env& env); diff --git a/decompiler/analysis/atomic_op_builder.cpp b/decompiler/analysis/atomic_op_builder.cpp index 93d3abcd72..3380ff4c1f 100644 --- a/decompiler/analysis/atomic_op_builder.cpp +++ b/decompiler/analysis/atomic_op_builder.cpp @@ -779,6 +779,7 @@ std::unique_ptr convert_bne_2(const Instruction& i0, } else { condition = IR2_Condition(IR2_Condition::Kind::NOT_EQUAL, make_src_atom(s0, idx), make_src_atom(s1, idx)); + condition.make_flipped(); } return make_branch(condition, i1, likely, dest, idx); } @@ -809,6 +810,7 @@ std::unique_ptr convert_beq_2(const Instruction& i0, } else { condition = IR2_Condition(IR2_Condition::Kind::EQUAL, make_src_atom(s0, idx), make_src_atom(s1, idx)); + condition.make_flipped(); } return make_branch(condition, i1, likely, dest, idx); } diff --git a/decompiler/analysis/cfg_builder.cpp b/decompiler/analysis/cfg_builder.cpp index d19c10b57f..bf8a50288a 100644 --- a/decompiler/analysis/cfg_builder.cpp +++ b/decompiler/analysis/cfg_builder.cpp @@ -1065,7 +1065,7 @@ Form* cfg_to_ir(FormPool& pool, const Function& f, const CfgVtx* vtx) { } else if (dynamic_cast(vtx)) { auto wvtx = dynamic_cast(vtx); auto condition = pool.alloc_single_element_form( - nullptr, IR2_Condition::Kind::ALWAYS, std::nullopt, std::nullopt, RegSet()); + nullptr, IR2_Condition::Kind::ALWAYS, std::nullopt, std::nullopt, RegSet(), false); auto result = pool.alloc_single_element_form(nullptr, condition, cfg_to_ir(pool, f, wvtx->block)); clean_up_infinite_while_loop(pool, From c255a2dab44d1061cf3cf3a6210e861fc5803354 Mon Sep 17 00:00:00 2001 From: water Date: Wed, 3 Feb 2021 22:51:20 -0500 Subject: [PATCH 07/10] wip --- decompiler/IR2/Form.cpp | 13 +++++++++++++ decompiler/IR2/FormExpressionAnalysis.cpp | 7 +++++++ decompiler/IR2/IR2_common.h | 6 ++++++ 3 files changed, 26 insertions(+) diff --git a/decompiler/IR2/Form.cpp b/decompiler/IR2/Form.cpp index c2e3663dd8..3420ccb21d 100644 --- a/decompiler/IR2/Form.cpp +++ b/decompiler/IR2/Form.cpp @@ -1084,6 +1084,19 @@ std::string fixed_operator_to_string(FixedOperatorKind kind) { return "object-new"; case FixedOperatorKind::TYPE_NEW: return "type-new"; + + case FixedOperatorKind::LT: + return "<"; + case FixedOperatorKind::GT: + return ">"; + case FixedOperatorKind::LEQ: + return "<="; + case FixedOperatorKind::GEQ: + return ">="; + case FixedOperatorKind::EQ: + return "="; + case FixedOperatorKind::NEQ: + return "!="; default: assert(false); } diff --git a/decompiler/IR2/FormExpressionAnalysis.cpp b/decompiler/IR2/FormExpressionAnalysis.cpp index 1f54bc7414..b07d171182 100644 --- a/decompiler/IR2/FormExpressionAnalysis.cpp +++ b/decompiler/IR2/FormExpressionAnalysis.cpp @@ -1082,7 +1082,14 @@ void ConditionElement::update_from_stack(const Env& env, for (int i = 0; i < get_condition_num_args(m_kind); i++) { vars.push_back(m_src[i]->var()); } + + if (m_flipped) { + std::reverse(vars.begin(), vars.end()); + } source_forms = pop_to_forms(vars, env, pool, stack, allow_side_effects, m_consumed); + if (m_flipped) { + std::reverse(source_forms.begin(), source_forms.end()); + } result->push_back( pool.alloc_element(GenericOperator::make_compare(m_kind), source_forms)); diff --git a/decompiler/IR2/IR2_common.h b/decompiler/IR2/IR2_common.h index 173b713d6e..ccf6df3e82 100644 --- a/decompiler/IR2/IR2_common.h +++ b/decompiler/IR2/IR2_common.h @@ -113,6 +113,12 @@ enum class FixedOperatorKind { NEW, OBJECT_NEW, TYPE_NEW, + LT, + GT, + LEQ, + GEQ, + EQ, + NEQ, INVALID }; From 801a54c9cd42adf54b00737dc93cf9da4053e85f Mon Sep 17 00:00:00 2001 From: water Date: Fri, 5 Feb 2021 17:07:40 -0500 Subject: [PATCH 08/10] fix conditions --- decompiler/IR2/Form.h | 5 + decompiler/IR2/FormExpressionAnalysis.cpp | 129 ++++++++++++++++-- test/decompiler/test_FormExpressionBuild.cpp | 16 +-- .../test_FormExpressionBuildLong.cpp | 81 ++++++----- test/goalc/test_with_game.cpp | 2 +- 5 files changed, 176 insertions(+), 57 deletions(-) diff --git a/decompiler/IR2/Form.h b/decompiler/IR2/Form.h index 4906bf1268..067ddb029f 100644 --- a/decompiler/IR2/Form.h +++ b/decompiler/IR2/Form.h @@ -289,6 +289,11 @@ class ConditionElement : public FormElement { void invert(); const RegSet& consume() const { return m_consumed; } + FormElement* make_generic(const Env& env, + FormPool& pool, + const std::vector& source_forms, + const std::vector& types); + private: IR2_Condition::Kind m_kind; std::optional m_src[2]; diff --git a/decompiler/IR2/FormExpressionAnalysis.cpp b/decompiler/IR2/FormExpressionAnalysis.cpp index 6baac63110..2b7f131f17 100644 --- a/decompiler/IR2/FormExpressionAnalysis.cpp +++ b/decompiler/IR2/FormExpressionAnalysis.cpp @@ -483,10 +483,19 @@ void SimpleExpressionElement::update_from_stack_copy_first_int_2(const Env& env, if (!m_expr.get_arg(1).is_var()) { auto args = pop_to_forms({m_expr.get_arg(0).var()}, env, pool, stack, allow_side_effects); - auto new_form = pool.alloc_element( - GenericOperator::make_fixed(kind), args.at(0), - pool.alloc_single_element_form(nullptr, m_expr.get_arg(1))); - result->push_back(new_form); + if (!arg0_i && !arg0_u) { + auto new_form = pool.alloc_element( + GenericOperator::make_fixed(kind), + pool.alloc_single_element_form(nullptr, TypeSpec("int"), args.at(0)), + pool.alloc_single_element_form(nullptr, m_expr.get_arg(1))); + result->push_back(new_form); + } else { + auto new_form = pool.alloc_element( + GenericOperator::make_fixed(kind), args.at(0), + pool.alloc_single_element_form(nullptr, m_expr.get_arg(1))); + result->push_back(new_form); + } + return; } auto arg1_i = is_int_type(env, m_my_idx, m_expr.get_arg(1).var()); @@ -583,8 +592,8 @@ void SimpleExpressionElement::update_from_stack(const Env& env, update_from_stack_lognot(env, pool, stack, result, allow_side_effects); break; case SimpleExpression::Kind::LEFT_SHIFT: - update_from_stack_force_ui_2(env, FixedOperatorKind::SHL, pool, stack, result, - allow_side_effects); + update_from_stack_copy_first_int_2(env, FixedOperatorKind::SHL, pool, stack, result, + allow_side_effects); break; case SimpleExpression::Kind::RIGHT_SHIFT_LOGIC: update_from_stack_force_ui_2(env, FixedOperatorKind::SHR, pool, stack, result, @@ -1050,12 +1059,106 @@ void ShortCircuitElement::update_from_stack(const Env& env, // ConditionElement /////////////////// +namespace { +Form* make_cast(Form* in, const TypeSpec& in_type, const TypeSpec& out_type, FormPool& pool) { + if (in_type == out_type) { + return in; + } + return pool.alloc_single_element_form(nullptr, out_type, in); +} + +std::vector make_cast(const std::vector& in, + const std::vector& in_types, + const TypeSpec& out_type, + FormPool& pool) { + std::vector out; + assert(in.size() == in_types.size()); + for (size_t i = 0; i < in_types.size(); i++) { + out.push_back(make_cast(in.at(i), in_types.at(i), out_type, pool)); + } + return out; +} +} // namespace + +FormElement* ConditionElement::make_generic(const Env& env, + FormPool& pool, + const std::vector& source_forms, + const std::vector& types) { + switch (m_kind) { + case IR2_Condition::Kind::TRUTHY: + case IR2_Condition::Kind::ZERO: + case IR2_Condition::Kind::NONZERO: + case IR2_Condition::Kind::FALSE: + case IR2_Condition::Kind::IS_PAIR: + case IR2_Condition::Kind::IS_NOT_PAIR: + // kind of a hack, we fall back to the old condition operator which is special cased + // to print the truthy condition in a nice way. and we use it for other things that don't + // require fancy renaming. + return pool.alloc_element(GenericOperator::make_compare(m_kind), + source_forms); + case IR2_Condition::Kind::EQUAL: + return pool.alloc_element(GenericOperator::make_fixed(FixedOperatorKind::EQ), + source_forms); + case IR2_Condition::Kind::NOT_EQUAL: + return pool.alloc_element(GenericOperator::make_fixed(FixedOperatorKind::NEQ), + source_forms); + + case IR2_Condition::Kind::LESS_THAN_SIGNED: + return pool.alloc_element( + GenericOperator::make_fixed(FixedOperatorKind::LT), + make_cast(source_forms, types, TypeSpec("int"), pool)); + case IR2_Condition::Kind::LESS_THAN_UNSIGNED: + return pool.alloc_element( + GenericOperator::make_fixed(FixedOperatorKind::LT), + make_cast(source_forms, types, TypeSpec("uint"), pool)); + + case IR2_Condition::Kind::GEQ_UNSIGNED: + return pool.alloc_element( + GenericOperator::make_fixed(FixedOperatorKind::GEQ), + make_cast(source_forms, types, TypeSpec("uint"), pool)); + + case IR2_Condition::Kind::LESS_THAN_ZERO_SIGNED: { + auto casted = make_cast(source_forms, types, TypeSpec("int"), pool); + auto zero = pool.alloc_single_element_form( + nullptr, SimpleAtom::make_int_constant(0)); + casted.push_back(zero); + return pool.alloc_element(GenericOperator::make_fixed(FixedOperatorKind::LT), + casted); + } + + case IR2_Condition::Kind::GEQ_ZERO_SIGNED: { + auto casted = make_cast(source_forms, types, TypeSpec("int"), pool); + auto zero = pool.alloc_single_element_form( + nullptr, SimpleAtom::make_int_constant(0)); + casted.push_back(zero); + return pool.alloc_element(GenericOperator::make_fixed(FixedOperatorKind::GEQ), + casted); + } + + case IR2_Condition::Kind::GREATER_THAN_ZERO_SIGNED: { + auto casted = make_cast(source_forms, types, TypeSpec("int"), pool); + auto zero = pool.alloc_single_element_form( + nullptr, SimpleAtom::make_int_constant(0)); + casted.push_back(zero); + return pool.alloc_element(GenericOperator::make_fixed(FixedOperatorKind::GT), + casted); + } + + default: + throw std::runtime_error("ConditionElement::make_generic NYI for kind " + + get_condition_kind_name(m_kind)); + } +} + void ConditionElement::push_to_stack(const Env& env, FormPool& pool, FormStack& stack) { std::vector source_forms; + std::vector source_types; std::vector vars; for (int i = 0; i < get_condition_num_args(m_kind); i++) { - vars.push_back(m_src[i]->var()); + auto& var = m_src[i]->var(); + vars.push_back(var); + source_types.push_back(env.get_types_before_op(var.idx()).get(var.reg()).typespec()); } if (m_flipped) { std::reverse(vars.begin(), vars.end()); @@ -1066,9 +1169,7 @@ void ConditionElement::push_to_stack(const Env& env, FormPool& pool, FormStack& std::reverse(source_forms.begin(), source_forms.end()); } - stack.push_form_element( - pool.alloc_element(GenericOperator::make_compare(m_kind), source_forms), - true); + stack.push_form_element(make_generic(env, pool, source_forms, source_types), true); } void ConditionElement::update_from_stack(const Env& env, @@ -1077,10 +1178,13 @@ void ConditionElement::update_from_stack(const Env& env, std::vector* result, bool allow_side_effects) { std::vector source_forms; + std::vector source_types; std::vector vars; for (int i = 0; i < get_condition_num_args(m_kind); i++) { - vars.push_back(m_src[i]->var()); + auto& var = m_src[i]->var(); + vars.push_back(var); + source_types.push_back(env.get_types_before_op(var.idx()).get(var.reg()).typespec()); } if (m_flipped) { @@ -1091,8 +1195,7 @@ void ConditionElement::update_from_stack(const Env& env, std::reverse(source_forms.begin(), source_forms.end()); } - result->push_back( - pool.alloc_element(GenericOperator::make_compare(m_kind), source_forms)); + result->push_back(make_generic(env, pool, source_forms, source_types)); } void ReturnElement::push_to_stack(const Env& env, FormPool& pool, FormStack& stack) { diff --git a/test/decompiler/test_FormExpressionBuild.cpp b/test/decompiler/test_FormExpressionBuild.cpp index f68b9ff736..7e62814ee6 100644 --- a/test/decompiler/test_FormExpressionBuild.cpp +++ b/test/decompiler/test_FormExpressionBuild.cpp @@ -662,7 +662,7 @@ TEST_F(FormRegressionTest, ExprRef) { "(begin\n" " (set! v1-0 0)\n" " (while\n" - " (<.si v1-0 a1-0)\n" + " (< v1-0 a1-0)\n" " (nop!)\n" " (nop!)\n" " (set! a0-0 (cdr a0-0))\n" @@ -730,7 +730,7 @@ TEST_F(FormRegressionTest, ExprPairMethod4) { " (set! v0-0 1)\n" " (while\n" " (and (!= v1-1 '()) " - " (<0.si (shl (the-as uint v1-1) 62))\n" + " (< (shl (the-as int v1-1) 62) 0)\n" " )\n" " (set! v0-0 (+ v0-0 1))\n" " (set! v1-1 (cdr v1-1))\n" @@ -1594,13 +1594,13 @@ TEST_F(FormRegressionTest, ExprSort) { " (set! s3-0 gp-0)\n" " (while\n" " (not\n" - " (or (= (cdr s3-0) (quote ())) (>=0.si (shl (the-as uint (cdr s3-0)) 62)))\n" + " (or (= (cdr s3-0) (quote ())) (>= (shl (the-as int (cdr s3-0)) 62) 0))\n" " )\n" " (set! s2-0 (car s3-0))\n" " (set! s1-0 (car (cdr s3-0)))\n" " (set! v1-1 (s5-0 s2-0 s1-0))\n" " (when\n" - " (and (or (not v1-1) (>0.si v1-1)) (!= v1-2 (quote #t)))\n" + " (and (or (not v1-1) (> (the-as int v1-1) 0)) (!= v1-2 (quote #t)))\n" " (set! s4-0 (+ s4-0 1))\n" " (set! (car s3-0) s1-0)\n" " (set! (car (cdr s3-0)) s2-0)\n" @@ -1897,7 +1897,7 @@ TEST_F(FormRegressionTest, ExprMemCopy) { " (set! v0-0 a0-0)\n" " (set! v1-0 0)\n" " (while\n" - " (<.si v1-0 a2-0)\n" + " (< v1-0 a2-0)\n" " (set! (-> (the-as (pointer int8) a0-0)) (-> (the-as (pointer uint8) a1-0)))\n" " (set! a0-0 (+ a0-0 (the-as uint 1)))\n" " (set! a1-0 (+ a1-0 (the-as uint 1)))\n" @@ -1940,7 +1940,7 @@ TEST_F(FormRegressionTest, ExprMemSet32) { " (set! v0-0 a0-0)\n" " (set! v1-0 0)\n" " (while\n" - " (<.si v1-0 a1-0)\n" + " (< v1-0 a1-0)\n" " (set! (-> (the-as (pointer int32) a0-0)) a2-0)\n" " (set! a0-0 (+ a0-0 (the-as uint 4)))\n" " (nop!)\n" @@ -1986,7 +1986,7 @@ TEST_F(FormRegressionTest, ExprMemOr) { " (set! v0-0 a0-0)\n" " (set! v1-0 0)\n" " (while\n" - " (<.si v1-0 a2-0)\n" + " (< v1-0 a2-0)\n" " (set!\n" " (-> (the-as (pointer int8) a0-0))\n" " (logior\n" @@ -2212,7 +2212,7 @@ TEST_F(FormRegressionTest, ExprPrintTreeBitmask) { " (set! s5-0 a1-0)\n" " (set! s4-0 0)\n" " (while\n" - " (<.si s4-0 s5-0)\n" + " (< s4-0 s5-0)\n" " (if\n" " (zero? (logand gp-0 1))\n" " (format (quote #t) L323)\n" diff --git a/test/decompiler/test_FormExpressionBuildLong.cpp b/test/decompiler/test_FormExpressionBuildLong.cpp index 3618eea4d5..4a7b237a88 100644 --- a/test/decompiler/test_FormExpressionBuildLong.cpp +++ b/test/decompiler/test_FormExpressionBuildLong.cpp @@ -547,7 +547,7 @@ TEST_F(FormRegressionTest, ExprArrayMethod2) { " ((= v1-1 (quote int32))\n" " (set! s5-0 0)\n" " (while\n" - " (<.si s5-0 (-> gp-0 length))\n" + " (< s5-0 (-> gp-0 length))\n" " (format\n" " (quote #t)\n" " (if (zero? s5-0) L341 L340)\n" @@ -561,7 +561,7 @@ TEST_F(FormRegressionTest, ExprArrayMethod2) { " ((= v1-1 (quote uint32))\n" " (set! s5-1 0)\n" " (while\n" - " (<.si s5-1 (-> gp-0 length))\n" + " (< s5-1 (-> gp-0 length))\n" " (format\n" " (quote #t)\n" " (if (zero? s5-1) L341 L340)\n" @@ -575,7 +575,7 @@ TEST_F(FormRegressionTest, ExprArrayMethod2) { " ((= v1-1 (quote int64))\n" " (set! s5-2 0)\n" " (while\n" - " (<.si s5-2 (-> gp-0 length))\n" + " (< s5-2 (-> gp-0 length))\n" " (format\n" " (quote #t)\n" " (if (zero? s5-2) L341 L340)\n" @@ -589,7 +589,7 @@ TEST_F(FormRegressionTest, ExprArrayMethod2) { " ((= v1-1 (quote uint64))\n" " (set! s5-3 0)\n" " (while\n" - " (<.si s5-3 (-> gp-0 length))\n" + " (< s5-3 (-> gp-0 length))\n" " (format\n" " (quote #t)\n" " (if (zero? s5-3) L339 L338)\n" @@ -603,7 +603,7 @@ TEST_F(FormRegressionTest, ExprArrayMethod2) { " ((= v1-1 (quote int8))\n" " (set! s5-4 0)\n" " (while\n" - " (<.si s5-4 (-> gp-0 length))\n" + " (< s5-4 (-> gp-0 length))\n" " (format\n" " (quote #t)\n" " (if (zero? s5-4) L341 L340)\n" @@ -617,7 +617,7 @@ TEST_F(FormRegressionTest, ExprArrayMethod2) { " ((= v1-1 (quote uint8))\n" " (set! s5-5 0)\n" " (while\n" - " (<.si s5-5 (-> gp-0 length))\n" + " (< s5-5 (-> gp-0 length))\n" " (format\n" " (quote #t)\n" " (if (zero? s5-5) L341 L340)\n" @@ -631,7 +631,7 @@ TEST_F(FormRegressionTest, ExprArrayMethod2) { " ((= v1-1 (quote int16))\n" " (set! s5-6 0)\n" " (while\n" - " (<.si s5-6 (-> gp-0 length))\n" + " (< s5-6 (-> gp-0 length))\n" " (format\n" " (quote #t)\n" " (if (zero? s5-6) L341 L340)\n" @@ -645,7 +645,7 @@ TEST_F(FormRegressionTest, ExprArrayMethod2) { " ((= v1-1 (quote uint16))\n" " (set! s5-7 0)\n" " (while\n" - " (<.si s5-7 (-> gp-0 length))\n" + " (< s5-7 (-> gp-0 length))\n" " (format\n" " (quote #t)\n" " (if (zero? s5-7) L341 L340)\n" @@ -662,14 +662,14 @@ TEST_F(FormRegressionTest, ExprArrayMethod2) { " (v1-40\n" " (set! s5-8 0)\n" " (while\n" - " (<.si s5-8 (-> gp-0 length))\n" + " (< s5-8 (-> gp-0 length))\n" " (set! t9-10 format)\n" " (set! a0-21 (quote #t))\n" " (set! a1-11 (if (zero? s5-8) L339 L338))\n" " (set!\n" " v1-42\n" " (+\n" - " (shl (the-as uint s5-8) 4)\n" + " (shl s5-8 4)\n" " (the-as int (the-as (array uint128) gp-0))\n" " )\n" " )\n" @@ -683,7 +683,7 @@ TEST_F(FormRegressionTest, ExprArrayMethod2) { " (else\n" " (set! s5-9 0)\n" " (while\n" - " (<.si s5-9 (-> gp-0 length))\n" + " (< s5-9 (-> gp-0 length))\n" " (format\n" " (quote #t)\n" " (if (zero? s5-9) L341 L340)\n" @@ -704,7 +704,7 @@ TEST_F(FormRegressionTest, ExprArrayMethod2) { " ((= (-> gp-0 content-type) float)\n" " (set! s5-10 0)\n" " (while\n" - " (<.si s5-10 (-> gp-0 length))\n" + " (< s5-10 (-> gp-0 length))\n" " (if\n" " (zero? s5-10)\n" " (format (quote #t) L343 (-> (the-as (array float) gp-0) s5-10))\n" @@ -718,7 +718,7 @@ TEST_F(FormRegressionTest, ExprArrayMethod2) { " (else\n" " (set! s5-11 0)\n" " (while\n" - " (<.si s5-11 (-> gp-0 length))\n" + " (< s5-11 (-> gp-0 length))\n" " (if\n" " (zero? s5-11)\n" " (format (quote #t) L336 (-> (the-as (array basic) gp-0) s5-11))\n" @@ -1231,7 +1231,7 @@ TEST_F(FormRegressionTest, ExprArrayMethod3) { " ((= v1-1 (quote int32))\n" " (set! s5-0 0)\n" " (while\n" - " (<.si s5-0 (-> gp-0 length))\n" + " (< s5-0 (-> gp-0 length))\n" " (format (quote #t) L328 s5-0 (-> (the-as (array int32) gp-0) s5-0))\n" " (set! s5-0 (+ s5-0 1))\n" " )\n" @@ -1241,7 +1241,7 @@ TEST_F(FormRegressionTest, ExprArrayMethod3) { " ((= v1-1 (quote uint32))\n" " (set! s5-1 0)\n" " (while\n" - " (<.si s5-1 (-> gp-0 length))\n" + " (< s5-1 (-> gp-0 length))\n" " (format (quote #t) L328 s5-1 (-> (the-as (array uint32) gp-0) s5-1))\n" " (set! s5-1 (+ s5-1 1))\n" " )\n" @@ -1251,7 +1251,7 @@ TEST_F(FormRegressionTest, ExprArrayMethod3) { " ((= v1-1 (quote int64))\n" " (set! s5-2 0)\n" " (while\n" - " (<.si s5-2 (-> gp-0 length))\n" + " (< s5-2 (-> gp-0 length))\n" " (format (quote #t) L328 s5-2 (-> (the-as (array int64) gp-0) s5-2))\n" " (set! s5-2 (+ s5-2 1))\n" " )\n" @@ -1261,7 +1261,7 @@ TEST_F(FormRegressionTest, ExprArrayMethod3) { " ((= v1-1 (quote uint64))\n" " (set! s5-3 0)\n" " (while\n" - " (<.si s5-3 (-> gp-0 length))\n" + " (< s5-3 (-> gp-0 length))\n" " (format (quote #t) L327 s5-3 (-> (the-as (array uint64) gp-0) s5-3))\n" " (set! s5-3 (+ s5-3 1))\n" " )\n" @@ -1271,7 +1271,7 @@ TEST_F(FormRegressionTest, ExprArrayMethod3) { " ((= v1-1 (quote int8))\n" " (set! s5-4 0)\n" " (while\n" - " (<.si s5-4 (-> gp-0 length))\n" + " (< s5-4 (-> gp-0 length))\n" " (format (quote #t) L328 s5-4 (-> (the-as (array int8) gp-0) s5-4))\n" " (set! s5-4 (+ s5-4 1))\n" " )\n" @@ -1281,7 +1281,7 @@ TEST_F(FormRegressionTest, ExprArrayMethod3) { " ((= v1-1 (quote uint8))\n" " (set! s5-5 0)\n" " (while\n" - " (<.si s5-5 (-> gp-0 length))\n" + " (< s5-5 (-> gp-0 length))\n" " (format (quote #t) L328 s5-5 (-> (the-as (array int8) gp-0) s5-5))\n" " (set! s5-5 (+ s5-5 1))\n" " )\n" @@ -1291,7 +1291,7 @@ TEST_F(FormRegressionTest, ExprArrayMethod3) { " ((= v1-1 (quote int16))\n" " (set! s5-6 0)\n" " (while\n" - " (<.si s5-6 (-> gp-0 length))\n" + " (< s5-6 (-> gp-0 length))\n" " (format (quote #t) L328 s5-6 (-> (the-as (array int16) gp-0) s5-6))\n" " (set! s5-6 (+ s5-6 1))\n" " )\n" @@ -1301,7 +1301,7 @@ TEST_F(FormRegressionTest, ExprArrayMethod3) { " ((= v1-1 (quote uint16))\n" " (set! s5-7 0)\n" " (while\n" - " (<.si s5-7 (-> gp-0 length))\n" + " (< s5-7 (-> gp-0 length))\n" " (format (quote #t) L328 s5-7 (-> (the-as (array uint16) gp-0) s5-7))\n" " (set! s5-7 (+ s5-7 1))\n" " )\n" @@ -1314,7 +1314,7 @@ TEST_F(FormRegressionTest, ExprArrayMethod3) { " (v1-40\n" " (set! s5-8 0)\n" " (while\n" - " (<.si s5-8 (-> gp-0 length))\n" + " (< s5-8 (-> gp-0 length))\n" " (set! t9-14 format)\n" " (set! a0-25 (quote #t))\n" " (set! a1-15 L327)\n" @@ -1322,7 +1322,7 @@ TEST_F(FormRegressionTest, ExprArrayMethod3) { " (set!\n" " v1-42\n" " (+\n" - " (shl (the-as uint s5-8) 4)\n" + " (shl s5-8 4)\n" " (the-as int (the-as (array uint128) gp-0))\n" " )\n" " )\n" @@ -1336,7 +1336,7 @@ TEST_F(FormRegressionTest, ExprArrayMethod3) { " (else\n" " (set! s5-9 0)\n" " (while\n" - " (<.si s5-9 (-> gp-0 length))\n" + " (< s5-9 (-> gp-0 length))\n" " (format (quote #t) L328 s5-9 (-> gp-0 s5-9))\n" " (set! s5-9 (+ s5-9 1))\n" " )\n" @@ -1353,7 +1353,7 @@ TEST_F(FormRegressionTest, ExprArrayMethod3) { " ((= (-> gp-0 content-type) float)\n" " (set! s5-10 0)\n" " (while\n" - " (<.si s5-10 (-> gp-0 length))\n" + " (< s5-10 (-> gp-0 length))\n" " (format (quote #t) L326 s5-10 (-> (the-as (array float) gp-0) s5-10))\n" " (set! s5-10 (+ s5-10 1))\n" " )\n" @@ -1363,7 +1363,7 @@ TEST_F(FormRegressionTest, ExprArrayMethod3) { " (else\n" " (set! s5-11 0)\n" " (while\n" - " (<.si s5-11 (-> gp-0 length))\n" + " (< s5-11 (-> gp-0 length))\n" " (format (quote #t) L325 s5-11 (-> (the-as (array basic) gp-0) s5-11))\n" " (set! s5-11 (+ s5-11 1))\n" " )\n" @@ -1932,13 +1932,16 @@ TEST_F(FormRegressionTest, ExprValid) { " (set! s3-0 a1-0)\n" " (set! s4-0 a2-0)\n" " (set! s5-0 t0-0)\n" - " (and (>=.ui gp-0 __START-OF-TABLE__) (begin (<.ui gp-0 134217728) v1-1))\n" + " (and\n" + " (>= (the-as uint gp-0) (the-as uint __START-OF-TABLE__))\n" + " (begin (< (the-as uint gp-0) (the-as uint 134217728)) v1-1)\n" + " )\n" " )\n" " )\n" " (cond\n" " ((not s3-0)\n" " (cond\n" - " ((nonzero? (logand gp-0 3))\n" + " ((nonzero? (logand (the-as int gp-0) 3))\n" " (if s4-0 (set! v1-4 (format s5-0 L321 gp-0 s4-0)))\n" " (quote #f)\n" " )\n" @@ -1951,13 +1954,17 @@ TEST_F(FormRegressionTest, ExprValid) { " (cond\n" " ((= s3-0 structure)\n" " (cond\n" - " ((nonzero? (logand gp-0 15))\n" + " ((nonzero? (logand (the-as int gp-0) 15))\n" " (if s4-0 (set! v1-8 (format s5-0 L319 gp-0 s4-0 s3-0)))\n" " (quote #f)\n" " )\n" " ((or\n" " (not v1-1)\n" - " (begin (set! v1-10 32768) (.daddu v1-11 v1-10 s7-0) (<.ui gp-0 v1-11))\n" + " (begin\n" + " (set! v1-10 32768)\n" + " (.daddu v1-11 v1-10 s7-0)\n" + " (< (the-as uint gp-0) (the-as uint v1-11))\n" + " )\n" " )\n" " (if s4-0 (set! v1-13 (format s5-0 L318 gp-0 s4-0 s3-0)))\n" " (quote #f)\n" @@ -1967,7 +1974,7 @@ TEST_F(FormRegressionTest, ExprValid) { " )\n" " ((= s3-0 pair)\n" " (cond\n" - " ((!= (logand gp-0 7) 2)\n" + " ((!= (logand (the-as int gp-0) 7) 2)\n" " (if s4-0 (set! v1-15 (format s5-0 L319 gp-0 s4-0 s3-0)))\n" " (quote #f)\n" " )\n" @@ -1980,14 +1987,14 @@ TEST_F(FormRegressionTest, ExprValid) { " )\n" " ((= s3-0 binteger)\n" " (cond\n" - " ((zero? (logand gp-0 7)) (quote #t))\n" + " ((zero? (logand (the-as int gp-0) 7)) (quote #t))\n" " (else\n" " (if s4-0 (set! v1-20 (format s5-0 L319 gp-0 s4-0 s3-0)))\n" " (quote #f)\n" " )\n" " )\n" " )\n" - " ((!= (logand gp-0 7) 4)\n" + " ((!= (logand (the-as int gp-0) 7) 4)\n" " (if s4-0 (set! v1-22 (format s5-0 L319 gp-0 s4-0 s3-0)))\n" " (quote #f)\n" " )\n" @@ -2020,14 +2027,18 @@ TEST_F(FormRegressionTest, ExprValid) { " (set! v1-43 32768)\n" " (.daddu v1-44 v1-43 s7-0)\n" " (cond\n" - " ((>=.ui gp-0 v1-44)\n" + " ((>= (the-as uint gp-0) (the-as uint v1-44))\n" " (if s4-0 (set! v1-46 (format s5-0 L315 gp-0 s4-0 s3-0)))\n" " (quote #f)\n" " )\n" " (else (quote #t))\n" " )\n" " )\n" - " ((begin (set! v1-47 32768) (.daddu v1-48 v1-47 s7-0) (<.ui gp-0 v1-48))\n" + " ((begin\n" + " (set! v1-47 32768)\n" + " (.daddu v1-48 v1-47 s7-0)\n" + " (< (the-as uint gp-0) (the-as uint v1-48))\n" + " )\n" " (if s4-0 (set! v1-50 (format s5-0 L314 gp-0 s4-0 s3-0)))\n" " (quote #f)\n" " )\n" diff --git a/test/goalc/test_with_game.cpp b/test/goalc/test_with_game.cpp index 87db0f0b1f..50edfbadf1 100644 --- a/test/goalc/test_with_game.cpp +++ b/test/goalc/test_with_game.cpp @@ -543,7 +543,7 @@ TEST_P(VectorFloatParameterizedTestFixtureWithRunner, VF_ABS_DEST) { testCase.operation = [](float x, float y) { // Avoid compiler warnings for unused variable, making a varient that accepts a lambda with only // 1 float is just unnecessary complexity - y = 0; + (void)y; return fabs(x); }; From bc0cf835fa0f82966615785c44b9b28a946da82e Mon Sep 17 00:00:00 2001 From: water Date: Fri, 5 Feb 2021 17:54:45 -0500 Subject: [PATCH 09/10] small bug fix in rewriter --- decompiler/IR2/Form.h | 3 +++ decompiler/IR2/FormExpressionAnalysis.cpp | 17 +++++++++++++++++ decompiler/IR2/FormStack.cpp | 6 ++++-- .../decompiler/test_FormExpressionBuildLong.cpp | 2 +- 4 files changed, 25 insertions(+), 3 deletions(-) diff --git a/decompiler/IR2/Form.h b/decompiler/IR2/Form.h index 067ddb029f..ce73316686 100644 --- a/decompiler/IR2/Form.h +++ b/decompiler/IR2/Form.h @@ -417,6 +417,7 @@ class CondWithElseElement : public FormElement { }; std::vector entries; Form* else_ir = nullptr; + bool already_rewritten = false; CondWithElseElement(std::vector _entries, Form* _else_ir) : entries(std::move(_entries)), else_ir(_else_ir) {} goos::Object to_form(const Env& env) const override; @@ -503,6 +504,7 @@ class ShortCircuitElement : public FormElement { Variable final_result; std::vector entries; std::optional used_as_value = std::nullopt; + bool already_rewritten = false; explicit ShortCircuitElement(std::vector _entries) : entries(std::move(_entries)) {} goos::Object to_form(const Env& env) const override; @@ -534,6 +536,7 @@ class CondNoElseElement : public FormElement { }; Variable final_destination; bool used_as_value = false; + bool already_rewritten = false; std::vector entries; explicit CondNoElseElement(std::vector _entries) : entries(std::move(_entries)) {} goos::Object to_form(const Env& env) const override; diff --git a/decompiler/IR2/FormExpressionAnalysis.cpp b/decompiler/IR2/FormExpressionAnalysis.cpp index 2b7f131f17..a2b4c57119 100644 --- a/decompiler/IR2/FormExpressionAnalysis.cpp +++ b/decompiler/IR2/FormExpressionAnalysis.cpp @@ -865,6 +865,9 @@ void WhileElement::push_to_stack(const Env& env, FormPool& pool, FormStack& stac // CondNoElseElement /////////////////// void CondNoElseElement::push_to_stack(const Env& env, FormPool& pool, FormStack& stack) { + if (already_rewritten) { + stack.push_form_element(this, true); + } for (auto& entry : entries) { for (auto form : {entry.condition, entry.body}) { FormStack temp_stack; @@ -891,9 +894,13 @@ void CondNoElseElement::push_to_stack(const Env& env, FormPool& pool, FormStack& } else { stack.push_form_element(this, true); } + already_rewritten = true; } void CondWithElseElement::push_to_stack(const Env& env, FormPool& pool, FormStack& stack) { + if (already_rewritten) { + stack.push_form_element(this, true); + } // first, let's try to detect if all bodies write the same value std::optional last_var; bool rewrite_as_set = true; @@ -990,6 +997,7 @@ void CondWithElseElement::push_to_stack(const Env& env, FormPool& pool, FormStac } else { stack.push_form_element(this, true); } + already_rewritten = true; } /////////////////// @@ -1003,6 +1011,9 @@ void ShortCircuitElement::push_to_stack(const Env& env, FormPool& pool, FormStac stack.push_form_element(this, true); } else { + if (already_rewritten) { + stack.push_form_element(this, true); + } for (int i = 0; i < int(entries.size()); i++) { auto& entry = entries.at(i); FormStack temp_stack; @@ -1024,6 +1035,7 @@ void ShortCircuitElement::push_to_stack(const Env& env, FormPool& pool, FormStac } assert(used_as_value.has_value()); stack.push_value_to_reg(final_result, pool.alloc_single_form(nullptr, this), true); + already_rewritten = true; } } @@ -1033,6 +1045,10 @@ void ShortCircuitElement::update_from_stack(const Env& env, std::vector* result, bool) { (void)stack; + if (already_rewritten) { + result->push_back(this); + return; + } for (int i = 0; i < int(entries.size()); i++) { auto& entry = entries.at(i); FormStack temp_stack; @@ -1053,6 +1069,7 @@ void ShortCircuitElement::update_from_stack(const Env& env, } } result->push_back(this); + already_rewritten = true; } /////////////////// diff --git a/decompiler/IR2/FormStack.cpp b/decompiler/IR2/FormStack.cpp index 449c45ed0a..66dcde2a84 100644 --- a/decompiler/IR2/FormStack.cpp +++ b/decompiler/IR2/FormStack.cpp @@ -26,6 +26,7 @@ std::string FormStack::print(const Env& env) { } void FormStack::push_value_to_reg(Variable var, Form* value, bool sequence_point) { + assert(value); StackEntry entry; entry.active = true; // by default, we should display everything! entry.sequence_point = sequence_point; @@ -37,6 +38,7 @@ void FormStack::push_value_to_reg(Variable var, Form* value, bool sequence_point void FormStack::push_non_seq_reg_to_reg(const Variable& dst, const Variable& src, Form* src_as_form) { + assert(src_as_form); StackEntry entry; entry.active = true; entry.sequence_point = false; @@ -89,7 +91,7 @@ Form* FormStack::pop_reg(Register reg, for (size_t i = m_stack.size(); i-- > 0;) { auto& entry = m_stack.at(i); if (entry.active) { - if (entry.destination->reg() == reg) { + if (entry.destination.has_value() && entry.destination->reg() == reg) { entry.source->get_modified_regs(modified); if (!allow_side_effects && entry.source->has_side_effects()) { // the source of the set! has a side effect and that's not allowed, so abort. @@ -159,7 +161,7 @@ std::vector FormStack::rewrite(FormPool& pool) { std::vector FormStack::rewrite_to_get_var(FormPool& pool, const Variable& var, - const Env&) { + const Env& env) { // first, rewrite as normal. auto default_result = rewrite(pool); diff --git a/test/decompiler/test_FormExpressionBuildLong.cpp b/test/decompiler/test_FormExpressionBuildLong.cpp index 4a7b237a88..6536e2ad5c 100644 --- a/test/decompiler/test_FormExpressionBuildLong.cpp +++ b/test/decompiler/test_FormExpressionBuildLong.cpp @@ -1934,7 +1934,7 @@ TEST_F(FormRegressionTest, ExprValid) { " (set! s5-0 t0-0)\n" " (and\n" " (>= (the-as uint gp-0) (the-as uint __START-OF-TABLE__))\n" - " (begin (< (the-as uint gp-0) (the-as uint 134217728)) v1-1)\n" + " (< (the-as uint gp-0) (the-as uint 134217728))\n" " )\n" " )\n" " )\n" From fbde0dd112509d6c0d5adff94e4a346580ed595f Mon Sep 17 00:00:00 2001 From: water Date: Fri, 5 Feb 2021 19:32:55 -0500 Subject: [PATCH 10/10] fix incredibly stupid printing bug --- decompiler/IR2/Form.cpp | 2 +- decompiler/IR2/FormExpressionAnalysis.cpp | 2 +- decompiler/IR2/FormStack.cpp | 2 +- test/decompiler/test_FormExpressionBuild.cpp | 121 +++++++++++++++++++ 4 files changed, 124 insertions(+), 3 deletions(-) diff --git a/decompiler/IR2/Form.cpp b/decompiler/IR2/Form.cpp index 3420ccb21d..da3ba4afce 100644 --- a/decompiler/IR2/Form.cpp +++ b/decompiler/IR2/Form.cpp @@ -769,7 +769,7 @@ goos::Object CondNoElseElement::to_form(const Env& env) const { for (auto& e : entries) { std::vector entry; entry.push_back(e.condition->to_form_as_condition(env)); - entries.front().body->inline_forms(list, env); + e.body->inline_forms(entry, env); list.push_back(pretty_print::build_list(entry)); } return pretty_print::build_list(list); diff --git a/decompiler/IR2/FormExpressionAnalysis.cpp b/decompiler/IR2/FormExpressionAnalysis.cpp index a2b4c57119..07cff720c5 100644 --- a/decompiler/IR2/FormExpressionAnalysis.cpp +++ b/decompiler/IR2/FormExpressionAnalysis.cpp @@ -1097,7 +1097,7 @@ std::vector make_cast(const std::vector& in, } } // namespace -FormElement* ConditionElement::make_generic(const Env& env, +FormElement* ConditionElement::make_generic(const Env&, FormPool& pool, const std::vector& source_forms, const std::vector& types) { diff --git a/decompiler/IR2/FormStack.cpp b/decompiler/IR2/FormStack.cpp index 66dcde2a84..2ae8349feb 100644 --- a/decompiler/IR2/FormStack.cpp +++ b/decompiler/IR2/FormStack.cpp @@ -161,7 +161,7 @@ std::vector FormStack::rewrite(FormPool& pool) { std::vector FormStack::rewrite_to_get_var(FormPool& pool, const Variable& var, - const Env& env) { + const Env&) { // first, rewrite as normal. auto default_result = rewrite(pool); diff --git a/test/decompiler/test_FormExpressionBuild.cpp b/test/decompiler/test_FormExpressionBuild.cpp index 7e62814ee6..72efbc2ca0 100644 --- a/test/decompiler/test_FormExpressionBuild.cpp +++ b/test/decompiler/test_FormExpressionBuild.cpp @@ -2225,4 +2225,125 @@ TEST_F(FormRegressionTest, ExprPrintTreeBitmask) { " (quote #f)\n" " )"; test_with_expr(func, type, expected, false, "", {{"L323", " "}, {"L322", "| "}}); +} + +TEST_F(FormRegressionTest, ExprPrintName) { + std::string func = + " sll r0, r0, 0\n" + "L136:\n" + " daddiu sp, sp, -16\n" + " sd ra, 0(sp)\n" + + " bne a0, a1, L137\n" + " or v1, s7, r0\n" + + " daddiu v0, s7, #t\n" + " beq r0, r0, L143\n" + " sll r0, r0, 0\n" + + "L137:\n" + " lwu v1, -4(a0)\n" + " lw a2, string(s7)\n" + " dsubu v1, v1, a2\n" + " daddiu a2, s7, 8\n" + " movn a2, s7, v1\n" + " beql s7, a2, L138\n" + " or v1, a2, r0\n" + + " lwu v1, -4(a1)\n" + " lw a2, string(s7)\n" + " dsubu a2, v1, a2\n" + " daddiu v1, s7, 8\n" + " movn v1, s7, a2\n" + + "L138:\n" + " beq s7, v1, L139\n" + " or v1, s7, r0\n" + + " lw t9, string=(s7)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + + " beq r0, r0, L143\n" + " sll r0, r0, 0\n" + + "L139:\n" + " lwu v1, -4(a0)\n" + " lw a2, string(s7)\n" + " dsubu v1, v1, a2\n" + " daddiu a2, s7, 8\n" + " movn a2, s7, v1\n" + " beql s7, a2, L140\n" + " or v1, a2, r0\n" + + " lwu v1, -4(a1)\n" + " lw a2, symbol(s7)\n" + " dsubu a2, v1, a2\n" + " daddiu v1, s7, 8\n" + " movn v1, s7, a2\n" + + "L140:\n" + " beq s7, v1, L141\n" + " or v1, s7, r0\n" + + " lw t9, string=(s7)\n" + " ori v1, r0, 65336\n" + " daddu v1, v1, a1\n" + " lwu a1, 0(v1)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + "\n" + " beq r0, r0, L143\n" + " sll r0, r0, 0\n" + + "L141:\n" + " lwu v1, -4(a1)\n" + " lw a2, string(s7)\n" + " dsubu v1, v1, a2\n" + " daddiu a2, s7, 8\n" + " movn a2, s7, v1\n" + " beql s7, a2, L142\n" + " or v1, a2, r0\n" + + " lwu v1, -4(a0)\n" + " lw a2, symbol(s7)\n" + " dsubu a2, v1, a2\n" + " daddiu v1, s7, 8\n" + " movn v1, s7, a2\n" + + "L142:\n" + " beq s7, v1, L143\n" + " or v0, s7, r0\n" + + " lw t9, string=(s7)\n" + " or v1, a1, r0\n" + " ori a1, r0, 65336\n" + " daddu a0, a1, a0\n" + " lwu a1, 0(a0)\n" + " or a0, v1, r0\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + + "L143:\n" + " ld ra, 0(sp)\n" + " jr ra\n" + " daddiu sp, sp, 16"; + std::string type = "(function basic basic symbol)"; + + std::string expected = + "(cond\n" + " ((= a0-0 a1-0) (quote #t))\n" + " ((and (= (-> a0-0 type) string) (= (-> a1-0 type) string))\n" + " (string= a0-0 a1-0)\n" + " )\n" + " ((and (= (-> a0-0 type) string) (= (-> a1-0 type) symbol))\n" + " (string= a0-0 (-> (+ 65336 (the-as int (the-as symbol a1-0))) 0))\n" + " )\n" + " ((and (= (-> a1-0 type) string) (= (-> a0-0 type) symbol))\n" + " (string= a1-0 (-> (+ 65336 (the-as int (the-as symbol a0-0))) 0))\n" + " )\n" + " )"; + test_with_expr(func, type, expected, false, "", {}, + parse_hint_json("[\t\t[24, [\"a1\", \"symbol\"]],\n" + "\t\t[39, [\"a0\", \"symbol\"]]]")); } \ No newline at end of file