diff --git a/decompiler/IR2/OpenGoalMapping.cpp b/decompiler/IR2/OpenGoalMapping.cpp index 98c334afac..324bf52069 100644 --- a/decompiler/IR2/OpenGoalMapping.cpp +++ b/decompiler/IR2/OpenGoalMapping.cpp @@ -12,11 +12,28 @@ const std::map MIPS_ASM_TO_OPEN_GOAL_FUN {InstructionKind::PSRAW, {".pw.sra", {}}}, {InstructionKind::PSUBW, {".psubw", {}}}, + // Boolean Arithmetic - or / not or / and {InstructionKind::POR, {".por", {}}}, {InstructionKind::PNOR, {".pnor", {}}}, {InstructionKind::PAND, {".pand", {}}}, + // Parallel Pack + {InstructionKind::PPACH, {".ppach", {}}}, + + // Parallel Compares + {InstructionKind::PCEQB, {".pceqb", {}}}, + // {InstructionKind::PCEQH, {".pceqh", {}}}, + {InstructionKind::PCEQW, {".pceqw", {}}}, + // {InstructionKind::PCGTB, {".pcgtb", {}}}, + // {InstructionKind::PCGTH, {".pcgth", {}}}, + {InstructionKind::PCGTW, {".pcgtw", {}}}, + + // Parallel Extends + {InstructionKind::PEXTUB, {".pextub", {}}}, + {InstructionKind::PEXTUH, {".pextuh", {}}}, {InstructionKind::PEXTUW, {".pextuw", {}}}, + {InstructionKind::PEXTLB, {".pextlb", {}}}, + {InstructionKind::PEXTLH, {".pextlh", {}}}, {InstructionKind::PEXTLW, {".pextlw", {}}}, {InstructionKind::PCPYLD, {".pcpyld", {}}}, {InstructionKind::PCPYUD, {".pcpyud", {}}}, @@ -113,13 +130,11 @@ const std::map MIPS_ASM_TO_OPEN_GOAL_FUN //// Fixed point conversions {InstructionKind::VFTOI0, {".ftoi.vf", {MOD::DEST_MASK}}}, {InstructionKind::VITOF0, {".itof.vf", {MOD::DEST_MASK}}}, - - {InstructionKind::VFTOI4, {"TODO.VFTOI4", {}}}, - - {InstructionKind::VITOF12, {"TODO.VITOF12", {}}}, - {InstructionKind::VFTOI12, {"TODO.VFTOI12", {}}}, - - {InstructionKind::VITOF15, {"TODO.VITOF15", {}}}, + // NOTE - Only the .xyzw mask is supported via macros! + {InstructionKind::VFTOI4, {"vftoi4.xyzw", {MOD::DEST_MASK}}}, + {InstructionKind::VITOF12, {"vitof12.xyzw", {MOD::DEST_MASK}}}, + {InstructionKind::VFTOI12, {"vftoi12.xyzw", {MOD::DEST_MASK}}}, + {InstructionKind::VITOF15, {"vitof15.xyzw", {MOD::DEST_MASK}}}, //// Status Checks {InstructionKind::VCLIP, {"TODO.VCLIP", {}}}, diff --git a/decompiler/ObjectFile/LinkedObjectFile.cpp b/decompiler/ObjectFile/LinkedObjectFile.cpp index cb8e8f12b9..91230bf532 100644 --- a/decompiler/ObjectFile/LinkedObjectFile.cpp +++ b/decompiler/ObjectFile/LinkedObjectFile.cpp @@ -961,4 +961,4 @@ const DecompilerLabel& LinkedObjectFile::get_label_by_name(const std::string& na } throw std::runtime_error("Cannot find label " + name); } -} // namespace decompiler \ No newline at end of file +} // namespace decompiler diff --git a/decompiler/config/all-types.gc b/decompiler/config/all-types.gc index 4b6c78cac4..b11eb6e6e4 100644 --- a/decompiler/config/all-types.gc +++ b/decompiler/config/all-types.gc @@ -15793,7 +15793,7 @@ (define-extern collide-cache-using-line-sphere-test function) (define-extern collide-cache-using-y-probe-test function) -(define-extern collide-cache-using-box-test function) +(define-extern collide-cache-using-box-test (function vector symbol)) (define-extern draw-node-cull function) @@ -19481,9 +19481,9 @@ ;; - Functions -(define-extern drawable-sphere-box-intersect? function) -(define-extern instance-sphere-box-intersect? function) -(define-extern instance-tfragment-add-debug-sphere function) +(define-extern drawable-sphere-box-intersect? (function drawable bounding-box4w symbol)) ;; TODO - pcgtw | por | ppach +(define-extern instance-sphere-box-intersect? (function drawable instance-tie bounding-box4w symbol)) ;; pextlh | VITOF12 | pcgtw | por | ppach ;; TODO - the first arg is based from the second arg in `drawable:11` +(define-extern instance-tfragment-add-debug-sphere (function drawable instance-tie symbol)) ;; unused ;; ---------------------- diff --git a/decompiler/config/jak1_ntsc_black_label/label_types.jsonc b/decompiler/config/jak1_ntsc_black_label/label_types.jsonc index 992ae7b743..f0ed9657ae 100644 --- a/decompiler/config/jak1_ntsc_black_label/label_types.jsonc +++ b/decompiler/config/jak1_ntsc_black_label/label_types.jsonc @@ -1286,6 +1286,10 @@ ["L652", "float", true] ], + "main-collide": [ + ["L5", "rgba", true] + ], + // please do not add things after this entry! git is dumb. "object-file-that-doesnt-actually-exist-and-i-just-put-this-here-to-prevent-merge-conflicts-with-this-file": [] } diff --git a/decompiler/config/jak1_ntsc_black_label/stack_structures.jsonc b/decompiler/config/jak1_ntsc_black_label/stack_structures.jsonc index f08294c1b5..5148c77dbe 100644 --- a/decompiler/config/jak1_ntsc_black_label/stack_structures.jsonc +++ b/decompiler/config/jak1_ntsc_black_label/stack_structures.jsonc @@ -577,6 +577,12 @@ "anim-tester-add-newobj": [[16, "event-message-block"]], "anim-tester-start": [[16, "event-message-block"]], "anim-tester-add-sequence": [[16, "event-message-block"]], + + "(anon-function 28 task-control)": [[16, "event-message-block"]], + + "instance-tfragment-add-debug-sphere": [ + [16, "vector"] + ], "placeholder-do-not-add-below!": [] } diff --git a/goal_src/engine/collide/main-collide.gc b/goal_src/engine/collide/main-collide.gc index 57fb2e53f6..542bcb808b 100644 --- a/goal_src/engine/collide/main-collide.gc +++ b/goal_src/engine/collide/main-collide.gc @@ -5,3 +5,227 @@ ;; name in dgo: main-collide ;; dgos: GAME, ENGINE +;; definition for function drawable-sphere-box-intersect? +;; WARN: Function may read a register that is not set: f31 +;; Used lq/sq +(defun drawable-sphere-box-intersect? ((arg0 drawable) (arg1 bounding-box4w)) + (local-vars + (r0-0 int) + (r0-1 int) + (r0-2 uint128) + (r0-3 int) + (v1-1 uint128) + (v1-2 uint128) + (v1-3 uint128) + (a0-1 uint128) + (a1-2 uint128) + (a2-0 uint128) + (f31-0 none) + ) + (rlet ((vf1 :class vf) + (vf2 :class vf) + (vf3 :class vf) + (vf4 :class vf) + ) + (nop!) + (nop!) + (.lvf vf1 (&-> arg0 bsphere quad)) + (.add.w.vf vf2 vf1 vf1 :mask #b111) + (let ((v1-0 (-> arg1 min quad))) + (.sub.w.vf vf1 vf1 vf1 :mask #b111) + (let ((a1-1 (-> arg1 max quad))) + (.ftoi.vf vf4 vf2) + (nop!) + (.ftoi.vf vf3 vf1) + (nop!) + (.mov a0-1 vf4) + (nop!) + (.mov a2-0 vf3) + (nop!) + (.pcgtw a1-2 a2-0 a1-1) + ) + (.mov r0-0 f31-0) + (.pcgtw v1-1 v1-0 a0-1) + ) + (.mov r0-1 f31-0) + (.por v1-2 a1-2 v1-1) + (.mov r0-2 f31-0) + (.ppach v1-3 r0-2 v1-2) + (.mov r0-3 f31-0) + (let ((v1-4 (shl (the-as int v1-3) 16))) + (nop!) + (zero? v1-4) + ) + ) + ) + +;; definition for function instance-sphere-box-intersect? +;; WARN: Function may read a register that is not set: f31 +;; Used lq/sq +(defun + instance-sphere-box-intersect? + ((arg0 drawable) (arg1 instance-tie) (arg2 bounding-box4w)) + (local-vars + (r0-0 uint128) + (r0-1 int) + (r0-2 uint128) + (r0-3 int) + (r0-4 int) + (r0-5 uint128) + (r0-6 int) + (v1-3 uint128) + (v1-4 uint128) + (v1-5 uint128) + (a0-2 uint128) + (a1-2 uint128) + (a2-1 uint128) + (a3-1 uint128) + (a3-3 uint128) + (a3-4 uint128) + (t0-1 uint128) + (t0-2 uint128) + (t1-0 uint128) + (t2-1 uint128) + (t2-2 uint128) + (f31-0 none) + ) + (rlet ((acc :class vf) + (vf0 :class vf) + (vf1 :class vf) + (vf10 :class vf) + (vf2 :class vf) + (vf3 :class vf) + (vf4 :class vf) + (vf5 :class vf) + (vf6 :class vf) + (vf7 :class vf) + (vf8 :class vf) + (vf9 :class vf) + ) + (init-vf0-vector) + (nop!) + (let ((v1-0 (-> arg1 max-scale))) + (nop!) + (let ((a3-0 (the-as uint128 (-> arg1 origin vector4h 3 long)))) + (nop!) + (let ((t2-0 (the-as uint128 (-> arg1 origin vector4h 0 long)))) + (.pextlh a3-1 a3-0 r0-0) + (let ((t0-0 (the-as uint128 (-> arg1 origin vector4h 1 long)))) + (.pw.sra t1-0 a3-1 10) + (let ((a3-2 (the-as uint128 (-> arg1 origin vector4h 2 long)))) + (.pextlh t2-1 t2-0 r0-0) + (.mov r0-1 f31-0) + (.pw.sra t2-2 t2-1 16) + (.mov r0-2 f31-0) + (.pextlh t0-1 t0-0 r0-2) + (.mov vf8 t1-0) + (.pw.sra t0-2 t0-1 16) + (.mov vf5 t2-2) + (.pextlh a3-3 a3-2 r0-2) + ) + ) + ) + ) + (.mov vf6 t0-2) + (.pw.sra a3-4 a3-3 16) + (.lvf vf9 (&-> arg1 bsphere quad)) + (nop!) + (.mov vf7 a3-4) + (nop!) + (.mov vf10 v1-0) + ) + (.itof.vf vf8 vf8) + (nop!) + (vitof12.xyzw vf5 vf5) + (nop!) + (vitof12.xyzw vf6 vf6) + (nop!) + (vitof12.xyzw vf7 vf7) + (nop!) + (.add.vf vf8 vf8 vf9 :mask #b111) + (nop!) + (nop!) + (.lvf vf9 (&-> arg0 bsphere quad)) + (vitof12.xyzw vf10 vf10) + (nop!) + (.mul.w.vf vf10 vf10 vf9 :mask #b1) + (nop!) + (.mul.x.vf acc vf5 vf9) + (nop!) + (.add.mul.y.vf acc vf6 vf9 acc) + (let ((v1-2 (-> arg2 min quad))) + (.add.mul.z.vf acc vf7 vf9 acc) + (let ((a1-1 (-> arg2 max quad))) + (.add.mul.w.vf vf1 vf8 vf0 acc) + (nop!) + (.add.x.vf vf2 vf1 vf10 :mask #b111) + (nop!) + (.sub.x.vf vf1 vf1 vf10 :mask #b111) + (nop!) + (.ftoi.vf vf4 vf2) + (nop!) + (.ftoi.vf vf3 vf1) + (nop!) + (.mov a0-2 vf4) + (nop!) + (.mov a2-1 vf3) + (nop!) + (.pcgtw a1-2 a2-1 a1-1) + ) + (.mov r0-3 f31-0) + (.pcgtw v1-3 v1-2 a0-2) + ) + (.mov r0-4 f31-0) + (.por v1-4 a1-2 v1-3) + (.mov r0-5 f31-0) + (.ppach v1-5 r0-5 v1-4) + (.mov r0-6 f31-0) + (let ((v1-6 (shl (the-as int v1-5) 16))) + (nop!) + (zero? v1-6) + ) + ) + ) + +;; definition for function instance-tfragment-add-debug-sphere +;; Used lq/sq +(defun instance-tfragment-add-debug-sphere ((arg0 drawable) (arg1 instance-tie)) + (local-vars (r0-0 uint128) (v1-1 uint128) (v1-2 uint128) (a3-0 float)) + (rlet ((vf0 :class vf) + (vf10 :class vf) + (vf11 :class vf) + (vf12 :class vf) + (vf9 :class vf) + ) + (init-vf0-vector) + (nop!) + (let ((v1-0 (the-as uint128 (-> arg1 origin vector4h 3 long)))) + (.pextlh v1-1 v1-0 r0-0) + ) + (.lvf vf9 (&-> arg0 bsphere quad)) + (.pw.sra v1-2 v1-1 10) + (.lvf vf10 (&-> arg1 bsphere quad)) + (nop!) + (.mov vf12 v1-2) + (.itof.vf vf12 vf12) + (nop!) + (.add.vf vf10 vf10 vf12 :mask #b111) + (nop!) + (.add.vf vf9 vf9 vf10 :mask #b111) + (nop!) + (.add.w.vf vf11 vf0 vf9 :mask #b1) + (nop!) + (.mov a3-0 vf11) + (nop!) + (let ((a2-0 (new-stack-vector0))) + (.svf (&-> a2-0 quad) vf9) + (add-debug-sphere + #t + (bucket-id debug-draw0) + a2-0 + a3-0 + (new 'static 'rgba :r #x80 :g #x80 :b #x80 :a #x80) + ) + ) + ) + ) diff --git a/goal_src/engine/ps2/vu1-macros.gc b/goal_src/engine/ps2/vu1-macros.gc index c36dcf8db7..1bff6d86c2 100644 --- a/goal_src/engine/ps2/vu1-macros.gc +++ b/goal_src/engine/ps2/vu1-macros.gc @@ -65,6 +65,85 @@ ) ) +(defmacro vftoi12.xyzw (dst src) + "convert to 20.12 integer. This does the multiply while the number is still + a float. This will have issues for very large floats, but it seems like this + is how PCSX2 does it as well, so maybe it's right? + NOTE: this is the only version of the instruction used in Jak 1, so we + don't need to worry about masks." + + `(begin + (rlet ((temp :class vf)) + (set! temp 4096.0) + (.mul.x.vf temp ,src temp) + (.ftoi.vf ,dst temp) + ) + ) + ) + +(defmacro vftoi15.xyzw (dst src) + "convert to 17.15 integer. This does the multiply while the number is still + a float. This will have issues for very large floats, but it seems like this + is how PCSX2 does it as well, so maybe it's right? + NOTE: this is the only version of the instruction used in Jak 1, so we + don't need to worry about masks." + + `(begin + (rlet ((temp :class vf)) + (set! temp 32768.0) + (.mul.x.vf temp ,src temp) + (.ftoi.vf ,dst temp) + ) + ) + ) + +(defmacro vitof4.xyzw (dst src) + "convert from a 28.4 integer. This does the multiply while the number is still + a float. This will have issues for very large floats, but it seems like this + is how PCSX2 does it as well, so maybe it's right? + NOTE: this is the only version of the instruction used in Jak 1, so we + don't need to worry about masks." + + `(begin + (rlet ((temp :class vf)) + (set! temp 0.0625) + (.mul.x.vf temp ,src temp) + (.ftoi.vf ,dst temp) + ) + ) + ) + +(defmacro vitof12.xyzw (dst src) + "convert from a 20.12 integer. This does the multiply while the number is still + a float. This will have issues for very large floats, but it seems like this + is how PCSX2 does it as well, so maybe it's right? + NOTE: this is the only version of the instruction used in Jak 1, so we + don't need to worry about masks." + + `(begin + (rlet ((temp :class vf)) + (set! temp 0.000244140625) + (.mul.x.vf temp ,src temp) + (.ftoi.vf ,dst temp) + ) + ) + ) + +(defmacro vitof15.xyzw (dst src) + "convert from a 17.15 integer. This does the multiply while the number is still + a float. This will have issues for very large floats, but it seems like this + is how PCSX2 does it as well, so maybe it's right? + NOTE: this is the only version of the instruction used in Jak 1, so we + don't need to worry about masks." + + `(begin + (rlet ((temp :class vf)) + (set! temp 0.000030517578125) + (.mul.x.vf temp ,src temp) + (.ftoi.vf ,dst temp) + ) + ) + ) ;; In the original game, init-for-transform stashed a bunch of stuff in registers, to be used by other functions. ;; In OpenGOAL, this seems risky so we're going to back up this manually. diff --git a/goalc/compiler/Compiler.h b/goalc/compiler/Compiler.h index 80f64e15ca..2e126358ba 100644 --- a/goalc/compiler/Compiler.h +++ b/goalc/compiler/Compiler.h @@ -457,17 +457,32 @@ class Compiler { Val* compile_asm_pw_sll(const goos::Object& form, const goos::Object& rest, Env* env); Val* compile_asm_pw_srl(const goos::Object& form, const goos::Object& rest, Env* env); Val* compile_asm_pw_sra(const goos::Object& form, const goos::Object& rest, Env* env); - Val* compile_asm_pextlw(const goos::Object& form, const goos::Object& rest, Env* env); + + Val* compile_asm_por(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_pnor(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_pand(const goos::Object& form, const goos::Object& rest, Env* env); + + Val* compile_asm_pceqb(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_pceqh(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_pceqw(const goos::Object& form, const goos::Object& rest, Env* env); + + Val* compile_asm_pcgtb(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_pcgth(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_pcgtw(const goos::Object& form, const goos::Object& rest, Env* env); + + Val* compile_asm_pextub(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_pextuh(const goos::Object& form, const goos::Object& rest, Env* env); Val* compile_asm_pextuw(const goos::Object& form, const goos::Object& rest, Env* env); + + Val* compile_asm_pextlb(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_pextlh(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_pextlw(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_pcpyud(const goos::Object& form, const goos::Object& rest, Env* env); Val* compile_asm_pcpyld(const goos::Object& form, const goos::Object& rest, Env* env); - Val* compile_asm_pceqw(const goos::Object& form, const goos::Object& rest, Env* env); Val* compile_asm_ppach(const goos::Object& form, const goos::Object& rest, Env* env); Val* compile_asm_psubw(const goos::Object& form, const goos::Object& rest, Env* env); Val* compile_asm_xorp(const goos::Object& form, const goos::Object& rest, Env* env); - Val* compile_asm_por(const goos::Object& form, const goos::Object& rest, Env* env); - Val* compile_asm_pnor(const goos::Object& form, const goos::Object& rest, Env* env); - Val* compile_asm_pand(const goos::Object& form, const goos::Object& rest, Env* env); // Atoms diff --git a/goalc/compiler/IR.cpp b/goalc/compiler/IR.cpp index b5ec399bf0..818ad96e6f 100644 --- a/goalc/compiler/IR.cpp +++ b/goalc/compiler/IR.cpp @@ -1518,9 +1518,21 @@ IR_Int128Math3Asm::IR_Int128Math3Asm(bool use_color, std::string IR_Int128Math3Asm::print() { std::string function = ""; switch (m_kind) { + case Kind::PEXTLB: + function = ".pextlb"; + break; + case Kind::PEXTLH: + function = ".pextlh"; + break; case Kind::PEXTLW: function = ".pextlw"; break; + case Kind::PEXTUB: + function = ".pextub"; + break; + case Kind::PEXTUH: + function = ".pextuh"; + break; case Kind::PEXTUW: function = ".pextuw"; break; @@ -1530,11 +1542,26 @@ std::string IR_Int128Math3Asm::print() { case Kind::PCPYUD: function = ".pcpyud"; break; + case Kind::PSUBW: + function = ".psubw"; + break; + case Kind::PCEQB: + function = ".pceqb"; + break; + case Kind::PCEQH: + function = ".pceqh"; + break; case Kind::PCEQW: function = ".pceqw"; break; - case Kind::PSUBW: - function = ".psubw"; + case Kind::PCGTB: + function = ".pcgtb"; + break; + case Kind::PCGTH: + function = ".pcgth"; + break; + case Kind::PCGTW: + function = ".pcgtw"; break; case Kind::POR: function = ".por"; @@ -1570,14 +1597,30 @@ void IR_Int128Math3Asm::do_codegen(emitter::ObjectGenerator* gen, auto src2 = get_reg_asm(m_src2, allocs, irec, m_use_coloring); switch (m_kind) { - case Kind::PEXTLW: + case Kind::PEXTUB: // NOTE: this is intentionally swapped because x86 and PS2 do this opposite ways. - gen->add_instr(IGen::pextlw_swapped(dst, src2, src1), irec); + gen->add_instr(IGen::pextub_swapped(dst, src2, src1), irec); + break; + case Kind::PEXTUH: + // NOTE: this is intentionally swapped because x86 and PS2 do this opposite ways. + gen->add_instr(IGen::pextuh_swapped(dst, src2, src1), irec); break; case Kind::PEXTUW: // NOTE: this is intentionally swapped because x86 and PS2 do this opposite ways. gen->add_instr(IGen::pextuw_swapped(dst, src2, src1), irec); break; + case Kind::PEXTLB: + // NOTE: this is intentionally swapped because x86 and PS2 do this opposite ways. + gen->add_instr(IGen::pextlb_swapped(dst, src2, src1), irec); + break; + case Kind::PEXTLH: + // NOTE: this is intentionally swapped because x86 and PS2 do this opposite ways. + gen->add_instr(IGen::pextlh_swapped(dst, src2, src1), irec); + break; + case Kind::PEXTLW: + // NOTE: this is intentionally swapped because x86 and PS2 do this opposite ways. + gen->add_instr(IGen::pextlw_swapped(dst, src2, src1), irec); + break; case Kind::PCPYLD: // NOTE: this is intentionally swapped because x86 and PS2 do this opposite ways. gen->add_instr(IGen::pcpyld_swapped(dst, src2, src1), irec); @@ -1585,8 +1628,23 @@ void IR_Int128Math3Asm::do_codegen(emitter::ObjectGenerator* gen, case Kind::PCPYUD: gen->add_instr(IGen::pcpyud(dst, src1, src2), irec); break; + case Kind::PCEQB: + gen->add_instr(IGen::parallel_compare_e_b(dst, src2, src1), irec); + break; + case Kind::PCEQH: + gen->add_instr(IGen::parallel_compare_e_h(dst, src2, src1), irec); + break; case Kind::PCEQW: - gen->add_instr(IGen::pceqw(dst, src1, src2), irec); + gen->add_instr(IGen::parallel_compare_e_w(dst, src2, src1), irec); + break; + case Kind::PCGTB: + gen->add_instr(IGen::parallel_compare_gt_b(dst, src2, src1), irec); + break; + case Kind::PCGTH: + gen->add_instr(IGen::parallel_compare_gt_h(dst, src2, src1), irec); + break; + case Kind::PCGTW: + gen->add_instr(IGen::parallel_compare_gt_w(dst, src2, src1), irec); break; case Kind::PSUBW: // psubW on mips is psubD on x86... diff --git a/goalc/compiler/IR.h b/goalc/compiler/IR.h index de627d5f11..5f34b4b684 100644 --- a/goalc/compiler/IR.h +++ b/goalc/compiler/IR.h @@ -562,7 +562,26 @@ class IR_VFMath3Asm : public IR_Asm { class IR_Int128Math3Asm : public IR_Asm { public: // these are MIPS names, not x86 names. - enum class Kind { PEXTLW, PEXTUW, PCPYUD, PCPYLD, PCEQW, PSUBW, POR, PXOR, PAND }; + enum class Kind { + PEXTUB, + PEXTUH, + PEXTUW, + PEXTLB, + PEXTLH, + PEXTLW, + PCPYUD, + PCPYLD, + PSUBW, + PCEQB, + PCEQH, + PCEQW, + PCGTB, + PCGTH, + PCGTW, + POR, + PXOR, + PAND + }; IR_Int128Math3Asm(bool use_color, const RegVal* dst, const RegVal* src1, diff --git a/goalc/compiler/compilation/Asm.cpp b/goalc/compiler/compilation/Asm.cpp index 6f76fc66f8..10e2b117de 100644 --- a/goalc/compiler/compilation/Asm.cpp +++ b/goalc/compiler/compilation/Asm.cpp @@ -686,14 +686,54 @@ Val* Compiler::compile_asm_pand(const goos::Object& form, const goos::Object& re return compile_asm_int128_math3(form, rest, IR_Int128Math3Asm::Kind::PAND, env); } -Val* Compiler::compile_asm_pextlw(const goos::Object& form, const goos::Object& rest, Env* env) { - return compile_asm_int128_math3(form, rest, IR_Int128Math3Asm::Kind::PEXTLW, env); +Val* Compiler::compile_asm_pceqb(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_int128_math3(form, rest, IR_Int128Math3Asm::Kind::PCEQB, env); +} + +Val* Compiler::compile_asm_pceqh(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_int128_math3(form, rest, IR_Int128Math3Asm::Kind::PCEQH, env); +} + +Val* Compiler::compile_asm_pceqw(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_int128_math3(form, rest, IR_Int128Math3Asm::Kind::PCEQW, env); +} + +Val* Compiler::compile_asm_pcgtb(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_int128_math3(form, rest, IR_Int128Math3Asm::Kind::PCGTB, env); +} + +Val* Compiler::compile_asm_pcgth(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_int128_math3(form, rest, IR_Int128Math3Asm::Kind::PCGTH, env); +} + +Val* Compiler::compile_asm_pcgtw(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_int128_math3(form, rest, IR_Int128Math3Asm::Kind::PCGTW, env); +} + +Val* Compiler::compile_asm_pextub(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_int128_math3(form, rest, IR_Int128Math3Asm::Kind::PEXTUB, env); +} + +Val* Compiler::compile_asm_pextuh(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_int128_math3(form, rest, IR_Int128Math3Asm::Kind::PEXTUH, env); } Val* Compiler::compile_asm_pextuw(const goos::Object& form, const goos::Object& rest, Env* env) { return compile_asm_int128_math3(form, rest, IR_Int128Math3Asm::Kind::PEXTUW, env); } +Val* Compiler::compile_asm_pextlb(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_int128_math3(form, rest, IR_Int128Math3Asm::Kind::PEXTLB, env); +} + +Val* Compiler::compile_asm_pextlh(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_int128_math3(form, rest, IR_Int128Math3Asm::Kind::PEXTLH, env); +} + +Val* Compiler::compile_asm_pextlw(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_int128_math3(form, rest, IR_Int128Math3Asm::Kind::PEXTLW, env); +} + Val* Compiler::compile_asm_pcpyud(const goos::Object& form, const goos::Object& rest, Env* env) { return compile_asm_int128_math3(form, rest, IR_Int128Math3Asm::Kind::PCPYUD, env); } @@ -702,10 +742,6 @@ Val* Compiler::compile_asm_pcpyld(const goos::Object& form, const goos::Object& return compile_asm_int128_math3(form, rest, IR_Int128Math3Asm::Kind::PCPYLD, env); } -Val* Compiler::compile_asm_pceqw(const goos::Object& form, const goos::Object& rest, Env* env) { - return compile_asm_int128_math3(form, rest, IR_Int128Math3Asm::Kind::PCEQW, env); -} - Val* Compiler::compile_asm_psubw(const goos::Object& form, const goos::Object& rest, Env* env) { return compile_asm_int128_math3(form, rest, IR_Int128Math3Asm::Kind::PSUBW, env); } diff --git a/goalc/compiler/compilation/Atoms.cpp b/goalc/compiler/compilation/Atoms.cpp index 7c2d9498f5..a27f5514a2 100644 --- a/goalc/compiler/compilation/Atoms.cpp +++ b/goalc/compiler/compilation/Atoms.cpp @@ -96,8 +96,27 @@ const std::unordered_map< {".pw.sll", &Compiler::compile_asm_pw_sll}, {".pw.srl", &Compiler::compile_asm_pw_srl}, {".pw.sra", &Compiler::compile_asm_pw_sra}, + + {".por", &Compiler::compile_asm_por}, + {".pnor", &Compiler::compile_asm_pnor}, + {".pand", &Compiler::compile_asm_pand}, + + {".pceqb", &Compiler::compile_asm_pceqb}, + {".pceqh", &Compiler::compile_asm_pceqh}, + {".pceqw", &Compiler::compile_asm_pceqw}, + + {".pcgtb", &Compiler::compile_asm_pcgtb}, + {".pcgth", &Compiler::compile_asm_pcgth}, + {".pcgtw", &Compiler::compile_asm_pcgtw}, + + {".pextlb", &Compiler::compile_asm_pextlb}, + {".pextlh", &Compiler::compile_asm_pextlh}, {".pextlw", &Compiler::compile_asm_pextlw}, + + {".pextub", &Compiler::compile_asm_pextub}, + {".pextuh", &Compiler::compile_asm_pextuh}, {".pextuw", &Compiler::compile_asm_pextuw}, + {".pcpyld", &Compiler::compile_asm_pcpyld}, {".pcpyud", &Compiler::compile_asm_pcpyud}, {".pceqw", &Compiler::compile_asm_pceqw}, diff --git a/goalc/emitter/IGen.h b/goalc/emitter/IGen.h index cedde7698d..595416d10f 100644 --- a/goalc/emitter/IGen.h +++ b/goalc/emitter/IGen.h @@ -2433,13 +2433,70 @@ class IGen { return instr; } - static Instruction pextlw_swapped(Register dst, Register src0, Register src1) { + static Instruction parallel_bitwise_or(Register dst, Register src0, Register src1) { assert(dst.is_xmm()); assert(src0.is_xmm()); assert(src1.is_xmm()); - // VEX.128.66.0F.WIG 62/r VPUNPCKLDQ xmm1, xmm2, xmm3/m128 + // VEX.128.66.0F.WIG EB /r VPOR xmm1, xmm2, xmm3/m128 // reg, vex, r/m - Instruction instr(0x62); + Instruction instr(0xEB); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; + } + + static Instruction parallel_bitwise_xor(Register dst, Register src0, Register src1) { + assert(dst.is_xmm()); + assert(src0.is_xmm()); + assert(src1.is_xmm()); + // VEX.128.66.0F.WIG EF /r VPXOR xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + Instruction instr(0xEF); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; + } + + static Instruction parallel_bitwise_and(Register dst, Register src0, Register src1) { + assert(dst.is_xmm()); + assert(src0.is_xmm()); + assert(src1.is_xmm()); + // VEX.128.66.0F.WIG DB /r VPAND xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + Instruction instr(0xDB); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; + } + + // Reminder - a word in MIPS = 32bits = a DWORD in x86 + // MIPS || x86 + // ----------------------- + // byte || byte + // halfword || word + // word || dword + // doubleword || quadword + + // -- Unpack High Data Instructions + static Instruction pextub_swapped(Register dst, Register src0, Register src1) { + assert(dst.is_xmm()); + assert(src0.is_xmm()); + assert(src1.is_xmm()); + // VEX.128.66.0F.WIG 68/r VPUNPCKHBW xmm1,xmm2, xmm3/m128 + // reg, vex, r/m + Instruction instr(0x68); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; + } + + static Instruction pextuh_swapped(Register dst, Register src0, Register src1) { + assert(dst.is_xmm()); + assert(src0.is_xmm()); + assert(src1.is_xmm()); + // VEX.128.66.0F.WIG 69/r VPUNPCKHWD xmm1,xmm2, xmm3/m128 + // reg, vex, r/m + Instruction instr(0x69); instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), false, VexPrefix::P_66); return instr; @@ -2457,35 +2514,71 @@ class IGen { return instr; } - static Instruction vpunpcklqdq(Register dst, Register src0, Register src1) { + // -- Unpack Low Data Instructions + static Instruction pextlb_swapped(Register dst, Register src0, Register src1) { assert(dst.is_xmm()); assert(src0.is_xmm()); assert(src1.is_xmm()); - // VEX.128.66.0F.WIG 6C/r VPUNPCKLQDQ xmm1, xmm2, xmm3/m128 + // VEX.128.66.0F.WIG 60/r VPUNPCKLBW xmm1,xmm2, xmm3/m128 // reg, vex, r/m - Instruction instr(0x6c); + Instruction instr(0x60); instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), false, VexPrefix::P_66); return instr; } - static Instruction pcpyld_swapped(Register dst, Register src0, Register src1) { - return vpunpcklqdq(dst, src0, src1); + static Instruction pextlh_swapped(Register dst, Register src0, Register src1) { + assert(dst.is_xmm()); + assert(src0.is_xmm()); + assert(src1.is_xmm()); + // VEX.128.66.0F.WIG 61/r VPUNPCKLWD xmm1,xmm2, xmm3/m128 + // reg, vex, r/m + Instruction instr(0x61); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; } - static Instruction pcpyud(Register dst, Register src0, Register src1) { + static Instruction pextlw_swapped(Register dst, Register src0, Register src1) { assert(dst.is_xmm()); assert(src0.is_xmm()); assert(src1.is_xmm()); - // VEX.128.66.0F.WIG 6D/r VPUNPCKHQDQ xmm1, xmm2, xmm3/m128 + // VEX.128.66.0F.WIG 62/r VPUNPCKLDQ xmm1, xmm2, xmm3/m128 // reg, vex, r/m - Instruction instr(0x6d); + Instruction instr(0x62); instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), false, VexPrefix::P_66); return instr; } - static Instruction pceqw(Register dst, Register src0, Register src1) { + // Equal to than comparison as 16 bytes (8 bits) + static Instruction parallel_compare_e_b(Register dst, Register src0, Register src1) { + assert(dst.is_xmm()); + assert(src0.is_xmm()); + assert(src1.is_xmm()); + // VEX.128.66.0F.WIG 74 /r VPCMPEQB xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + Instruction instr(0x74); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; + } + + // Equal to than comparison as 8 halfwords (16 bits) + static Instruction parallel_compare_e_h(Register dst, Register src0, Register src1) { + assert(dst.is_xmm()); + assert(src0.is_xmm()); + assert(src1.is_xmm()); + // VEX.128.66.0F.WIG 75 /r VPCMPEQW xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + Instruction instr(0x75); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; + } + + // Equal to than comparison as 4 words (32 bits) + static Instruction parallel_compare_e_w(Register dst, Register src0, Register src1) { assert(dst.is_xmm()); assert(src0.is_xmm()); assert(src1.is_xmm()); @@ -2497,6 +2590,73 @@ class IGen { return instr; } + // Greater than comparison as 16 bytes (8 bits) + static Instruction parallel_compare_gt_b(Register dst, Register src0, Register src1) { + assert(dst.is_xmm()); + assert(src0.is_xmm()); + assert(src1.is_xmm()); + // VEX.128.66.0F.WIG 64 /r VPCMPGTB xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + Instruction instr(0x64); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; + } + + // Greater than comparison as 8 halfwords (16 bits) + static Instruction parallel_compare_gt_h(Register dst, Register src0, Register src1) { + assert(dst.is_xmm()); + assert(src0.is_xmm()); + assert(src1.is_xmm()); + // VEX.128.66.0F.WIG 65 /r VPCMPGTW xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + Instruction instr(0x65); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; + } + + // Greater than comparison as 4 words (32 bits) + static Instruction parallel_compare_gt_w(Register dst, Register src0, Register src1) { + assert(dst.is_xmm()); + assert(src0.is_xmm()); + assert(src1.is_xmm()); + // VEX.128.66.0F.WIG 66 /r VPCMPGTD xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + Instruction instr(0x66); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; + } + + static Instruction vpunpcklqdq(Register dst, Register src0, Register src1) { + assert(dst.is_xmm()); + assert(src0.is_xmm()); + assert(src1.is_xmm()); + // VEX.128.66.0F.WIG 6C/r VPUNPCKLQDQ xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + Instruction instr(0x6c); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; + } + + static Instruction pcpyld_swapped(Register dst, Register src0, Register src1) { + return vpunpcklqdq(dst, src0, src1); + } + + static Instruction pcpyud(Register dst, Register src0, Register src1) { + assert(dst.is_xmm()); + assert(src0.is_xmm()); + assert(src1.is_xmm()); + // VEX.128.66.0F.WIG 6D/r VPUNPCKHQDQ xmm1, xmm2, xmm3/m128 + // reg, vex, r/m + Instruction instr(0x6d); + instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), + false, VexPrefix::P_66); + return instr; + } + static Instruction vpsubd(Register dst, Register src0, Register src1) { assert(dst.is_xmm()); assert(src0.is_xmm()); @@ -2552,41 +2712,5 @@ class IGen { instr.set(Imm(1, imm)); return instr; } - - static Instruction parallel_bitwise_or(Register dst, Register src0, Register src1) { - assert(dst.is_xmm()); - assert(src0.is_xmm()); - assert(src1.is_xmm()); - // VEX.128.66.0F.WIG EB /r VPOR xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0xEB); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - static Instruction parallel_bitwise_xor(Register dst, Register src0, Register src1) { - assert(dst.is_xmm()); - assert(src0.is_xmm()); - assert(src1.is_xmm()); - // VEX.128.66.0F.WIG EF /r VPXOR xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0xEF); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } - - static Instruction parallel_bitwise_and(Register dst, Register src0, Register src1) { - assert(dst.is_xmm()); - assert(src0.is_xmm()); - assert(src1.is_xmm()); - // VEX.128.66.0F.WIG DB /r VPAND xmm1, xmm2, xmm3/m128 - // reg, vex, r/m - Instruction instr(0xDB); - instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(), - false, VexPrefix::P_66); - return instr; - } }; } // namespace emitter diff --git a/test/decompiler/reference/decompiler-macros.gc b/test/decompiler/reference/decompiler-macros.gc index fb49fdd674..c9c60c8d6a 100644 --- a/test/decompiler/reference/decompiler-macros.gc +++ b/test/decompiler/reference/decompiler-macros.gc @@ -233,4 +233,100 @@ ) `(new 'static 'sound-name :lo ,lo-val :hi ,hi-val) ) - ) \ No newline at end of file + ) + +(defmacro vftoi4.xyzw (dst src) + "convert to 28.4 integer. This does the multiply while the number is still + a float. This will have issues for very large floats, but it seems like this + is how PCSX2 does it as well, so maybe it's right? + NOTE: this is the only version of the instruction used in Jak 1, so we + don't need to worry about masks." + + `(begin + (rlet ((temp :class vf)) + (set! temp 16.0) + (.mul.x.vf temp ,src temp) + (.ftoi.vf ,dst temp) + ) + ) + ) + +(defmacro vftoi12.xyzw (dst src) + "convert to 20.12 integer. This does the multiply while the number is still + a float. This will have issues for very large floats, but it seems like this + is how PCSX2 does it as well, so maybe it's right? + NOTE: this is the only version of the instruction used in Jak 1, so we + don't need to worry about masks." + + `(begin + (rlet ((temp :class vf)) + (set! temp 4096.0) + (.mul.x.vf temp ,src temp) + (.ftoi.vf ,dst temp) + ) + ) + ) + +(defmacro vftoi15.xyzw (dst src) + "convert to 17.15 integer. This does the multiply while the number is still + a float. This will have issues for very large floats, but it seems like this + is how PCSX2 does it as well, so maybe it's right? + NOTE: this is the only version of the instruction used in Jak 1, so we + don't need to worry about masks." + + `(begin + (rlet ((temp :class vf)) + (set! temp 32768.0) + (.mul.x.vf temp ,src temp) + (.ftoi.vf ,dst temp) + ) + ) + ) + +(defmacro vitof4.xyzw (dst src) + "convert from a 28.4 integer. This does the multiply while the number is still + a float. This will have issues for very large floats, but it seems like this + is how PCSX2 does it as well, so maybe it's right? + NOTE: this is the only version of the instruction used in Jak 1, so we + don't need to worry about masks." + + `(begin + (rlet ((temp :class vf)) + (set! temp 0.0625) + (.mul.x.vf temp ,src temp) + (.ftoi.vf ,dst temp) + ) + ) + ) + +(defmacro vitof12.xyzw (dst src) + "convert from a 20.12 integer. This does the multiply while the number is still + a float. This will have issues for very large floats, but it seems like this + is how PCSX2 does it as well, so maybe it's right? + NOTE: this is the only version of the instruction used in Jak 1, so we + don't need to worry about masks." + + `(begin + (rlet ((temp :class vf)) + (set! temp 0.000244140625) + (.mul.x.vf temp ,src temp) + (.ftoi.vf ,dst temp) + ) + ) + ) + +(defmacro vitof15.xyzw (dst src) + "convert from a 17.15 integer. This does the multiply while the number is still + a float. This will have issues for very large floats, but it seems like this + is how PCSX2 does it as well, so maybe it's right? + NOTE: this is the only version of the instruction used in Jak 1, so we + don't need to worry about masks." + + `(begin + (rlet ((temp :class vf)) + (set! temp 0.000030517578125) + (.mul.x.vf temp ,src temp) + (.ftoi.vf ,dst temp) + ) + ) + ) diff --git a/test/decompiler/reference/engine/camera/math-camera_REF.gc b/test/decompiler/reference/engine/camera/math-camera_REF.gc index bd90beab52..2fe7706eb2 100644 --- a/test/decompiler/reference/engine/camera/math-camera_REF.gc +++ b/test/decompiler/reference/engine/camera/math-camera_REF.gc @@ -475,7 +475,6 @@ ;; definition for function transform-point-qword! ;; WARN: Inline assembly instruction marked with TODO - [TODO.VCLIP] ;; WARN: Unsupported inline assembly instruction kind - [cfc2.i v1, Clipping] -;; WARN: Inline assembly instruction marked with TODO - [TODO.VFTOI4] (defun transform-point-qword! ((arg0 vector4w) (arg1 vector)) (local-vars (v1-7 int)) (rlet ((acc :class vf) @@ -514,7 +513,7 @@ (.mul.vf vf23 vf23 Q) (.add.vf vf28 vf28 vf30) (.max.x.vf vf28 vf28 vf0 :mask #b1000) - (TODO.VFTOI4 vf28 vf28) + (vftoi4.xyzw vf28 vf28) (.svf (&-> arg0 quad) vf28) (zero? (logand v1-7 63)) ) diff --git a/test/decompiler/reference/engine/collide/main-collide_REF.gc b/test/decompiler/reference/engine/collide/main-collide_REF.gc new file mode 100644 index 0000000000..18f4e1e3bc --- /dev/null +++ b/test/decompiler/reference/engine/collide/main-collide_REF.gc @@ -0,0 +1,231 @@ +;;-*-Lisp-*- +(in-package goal) + +;; definition for function drawable-sphere-box-intersect? +;; WARN: Function may read a register that is not set: f31 +;; Used lq/sq +(defun drawable-sphere-box-intersect? ((arg0 drawable) (arg1 bounding-box4w)) + (local-vars + (r0-0 int) + (r0-1 int) + (r0-2 uint128) + (r0-3 int) + (v1-1 uint128) + (v1-2 uint128) + (v1-3 uint128) + (a0-1 uint128) + (a1-2 uint128) + (a2-0 uint128) + (f31-0 none) + ) + (rlet ((vf1 :class vf) + (vf2 :class vf) + (vf3 :class vf) + (vf4 :class vf) + ) + (nop!) + (nop!) + (.lvf vf1 (&-> arg0 bsphere quad)) + (.add.w.vf vf2 vf1 vf1 :mask #b111) + (let ((v1-0 (-> arg1 min quad))) + (.sub.w.vf vf1 vf1 vf1 :mask #b111) + (let ((a1-1 (-> arg1 max quad))) + (.ftoi.vf vf4 vf2) + (nop!) + (.ftoi.vf vf3 vf1) + (nop!) + (.mov a0-1 vf4) + (nop!) + (.mov a2-0 vf3) + (nop!) + (.pcgtw a1-2 a2-0 a1-1) + ) + (.mov r0-0 f31-0) + (.pcgtw v1-1 v1-0 a0-1) + ) + (.mov r0-1 f31-0) + (.por v1-2 a1-2 v1-1) + (.mov r0-2 f31-0) + (.ppach v1-3 r0-2 v1-2) + (.mov r0-3 f31-0) + (let ((v1-4 (shl (the-as int v1-3) 16))) + (nop!) + (zero? v1-4) + ) + ) + ) + +;; definition for function instance-sphere-box-intersect? +;; WARN: Function may read a register that is not set: f31 +;; Used lq/sq +(defun + instance-sphere-box-intersect? + ((arg0 drawable) (arg1 instance-tie) (arg2 bounding-box4w)) + (local-vars + (r0-0 uint128) + (r0-1 int) + (r0-2 uint128) + (r0-3 int) + (r0-4 int) + (r0-5 uint128) + (r0-6 int) + (v1-3 uint128) + (v1-4 uint128) + (v1-5 uint128) + (a0-2 uint128) + (a1-2 uint128) + (a2-1 uint128) + (a3-1 uint128) + (a3-3 uint128) + (a3-4 uint128) + (t0-1 uint128) + (t0-2 uint128) + (t1-0 uint128) + (t2-1 uint128) + (t2-2 uint128) + (f31-0 none) + ) + (rlet ((acc :class vf) + (vf0 :class vf) + (vf1 :class vf) + (vf10 :class vf) + (vf2 :class vf) + (vf3 :class vf) + (vf4 :class vf) + (vf5 :class vf) + (vf6 :class vf) + (vf7 :class vf) + (vf8 :class vf) + (vf9 :class vf) + ) + (init-vf0-vector) + (nop!) + (let ((v1-0 (-> arg1 max-scale))) + (nop!) + (let ((a3-0 (the-as uint128 (-> arg1 origin vector4h 3 long)))) + (nop!) + (let ((t2-0 (the-as uint128 (-> arg1 origin vector4h 0 long)))) + (.pextlh a3-1 a3-0 r0-0) + (let ((t0-0 (the-as uint128 (-> arg1 origin vector4h 1 long)))) + (.pw.sra t1-0 a3-1 10) + (let ((a3-2 (the-as uint128 (-> arg1 origin vector4h 2 long)))) + (.pextlh t2-1 t2-0 r0-0) + (.mov r0-1 f31-0) + (.pw.sra t2-2 t2-1 16) + (.mov r0-2 f31-0) + (.pextlh t0-1 t0-0 r0-2) + (.mov vf8 t1-0) + (.pw.sra t0-2 t0-1 16) + (.mov vf5 t2-2) + (.pextlh a3-3 a3-2 r0-2) + ) + ) + ) + ) + (.mov vf6 t0-2) + (.pw.sra a3-4 a3-3 16) + (.lvf vf9 (&-> arg1 bsphere quad)) + (nop!) + (.mov vf7 a3-4) + (nop!) + (.mov vf10 v1-0) + ) + (.itof.vf vf8 vf8) + (nop!) + (vitof12.xyzw vf5 vf5) + (nop!) + (vitof12.xyzw vf6 vf6) + (nop!) + (vitof12.xyzw vf7 vf7) + (nop!) + (.add.vf vf8 vf8 vf9 :mask #b111) + (nop!) + (nop!) + (.lvf vf9 (&-> arg0 bsphere quad)) + (vitof12.xyzw vf10 vf10) + (nop!) + (.mul.w.vf vf10 vf10 vf9 :mask #b1) + (nop!) + (.mul.x.vf acc vf5 vf9) + (nop!) + (.add.mul.y.vf acc vf6 vf9 acc) + (let ((v1-2 (-> arg2 min quad))) + (.add.mul.z.vf acc vf7 vf9 acc) + (let ((a1-1 (-> arg2 max quad))) + (.add.mul.w.vf vf1 vf8 vf0 acc) + (nop!) + (.add.x.vf vf2 vf1 vf10 :mask #b111) + (nop!) + (.sub.x.vf vf1 vf1 vf10 :mask #b111) + (nop!) + (.ftoi.vf vf4 vf2) + (nop!) + (.ftoi.vf vf3 vf1) + (nop!) + (.mov a0-2 vf4) + (nop!) + (.mov a2-1 vf3) + (nop!) + (.pcgtw a1-2 a2-1 a1-1) + ) + (.mov r0-3 f31-0) + (.pcgtw v1-3 v1-2 a0-2) + ) + (.mov r0-4 f31-0) + (.por v1-4 a1-2 v1-3) + (.mov r0-5 f31-0) + (.ppach v1-5 r0-5 v1-4) + (.mov r0-6 f31-0) + (let ((v1-6 (shl (the-as int v1-5) 16))) + (nop!) + (zero? v1-6) + ) + ) + ) + +;; definition for function instance-tfragment-add-debug-sphere +;; Used lq/sq +(defun instance-tfragment-add-debug-sphere ((arg0 drawable) (arg1 instance-tie)) + (local-vars (r0-0 uint128) (v1-1 uint128) (v1-2 uint128) (a3-0 float)) + (rlet ((vf0 :class vf) + (vf10 :class vf) + (vf11 :class vf) + (vf12 :class vf) + (vf9 :class vf) + ) + (init-vf0-vector) + (nop!) + (let ((v1-0 (the-as uint128 (-> arg1 origin vector4h 3 long)))) + (.pextlh v1-1 v1-0 r0-0) + ) + (.lvf vf9 (&-> arg0 bsphere quad)) + (.pw.sra v1-2 v1-1 10) + (.lvf vf10 (&-> arg1 bsphere quad)) + (nop!) + (.mov vf12 v1-2) + (.itof.vf vf12 vf12) + (nop!) + (.add.vf vf10 vf10 vf12 :mask #b111) + (nop!) + (.add.vf vf9 vf9 vf10 :mask #b111) + (nop!) + (.add.w.vf vf11 vf0 vf9 :mask #b1) + (nop!) + (.mov a3-0 vf11) + (nop!) + (let ((a2-0 (new-stack-vector0))) + (.svf (&-> a2-0 quad) vf9) + (add-debug-sphere + #t + (bucket-id debug-draw0) + a2-0 + a3-0 + (new 'static 'rgba :r #x80 :g #x80 :b #x80 :a #x80) + ) + ) + ) + ) + + + + diff --git a/test/goalc/test_with_game.cpp b/test/goalc/test_with_game.cpp index 99818a5f37..1d56ed88ec 100644 --- a/test/goalc/test_with_game.cpp +++ b/test/goalc/test_with_game.cpp @@ -667,6 +667,8 @@ TEST_F(WithGameTests, I128Simple) { "12344321\n"}); } +// TODO - add tests + TEST_F(WithGameTests, Pextlw) { runner.run_static_test(env, testCategory, "test-pextlw.gc", {"#x07060504171615140302010013121110\n" diff --git a/test/test_emitter_avx.cpp b/test/test_emitter_avx.cpp index c6664ea9da..794cc6963d 100644 --- a/test/test_emitter_avx.cpp +++ b/test/test_emitter_avx.cpp @@ -373,6 +373,126 @@ TEST(EmitterAVX, VPSLLD) { EXPECT_EQ(tester.dump_to_hex_string(true), "C5E172F403C4C16172F604C59172F405C4C11172F606"); } +TEST(EmitterAVX, VPCMPEQB) { + CodeTester tester; + tester.init_code_buffer(1024); + tester.emit(IGen::parallel_compare_e_b(XMM0 + 3, XMM0 + 3, XMM0 + 3)); + tester.emit(IGen::parallel_compare_e_b(XMM0 + 3, XMM0 + 3, XMM0 + 13)); + tester.emit(IGen::parallel_compare_e_b(XMM0 + 3, XMM0 + 13, XMM0 + 3)); + tester.emit(IGen::parallel_compare_e_b(XMM0 + 3, XMM0 + 13, XMM0 + 13)); + tester.emit(IGen::parallel_compare_e_b(XMM0 + 13, XMM0 + 3, XMM0 + 3)); + tester.emit(IGen::parallel_compare_e_b(XMM0 + 13, XMM0 + 3, XMM0 + 13)); + tester.emit(IGen::parallel_compare_e_b(XMM0 + 13, XMM0 + 13, XMM0 + 3)); + tester.emit(IGen::parallel_compare_e_b(XMM0 + 13, XMM0 + 13, XMM0 + 13)); + EXPECT_EQ(tester.dump_to_hex_string(true), + "C5E174DBC4C16174DDC59174DBC4C11174DDC56174EBC4416174EDC51174EBC4411174ED"); +} + +TEST(EmitterAVX, VPCMPEQW) { + CodeTester tester; + tester.init_code_buffer(1024); + tester.emit(IGen::parallel_compare_e_h(XMM0 + 3, XMM0 + 3, XMM0 + 3)); + tester.emit(IGen::parallel_compare_e_h(XMM0 + 3, XMM0 + 3, XMM0 + 13)); + tester.emit(IGen::parallel_compare_e_h(XMM0 + 3, XMM0 + 13, XMM0 + 3)); + tester.emit(IGen::parallel_compare_e_h(XMM0 + 3, XMM0 + 13, XMM0 + 13)); + tester.emit(IGen::parallel_compare_e_h(XMM0 + 13, XMM0 + 3, XMM0 + 3)); + tester.emit(IGen::parallel_compare_e_h(XMM0 + 13, XMM0 + 3, XMM0 + 13)); + tester.emit(IGen::parallel_compare_e_h(XMM0 + 13, XMM0 + 13, XMM0 + 3)); + tester.emit(IGen::parallel_compare_e_h(XMM0 + 13, XMM0 + 13, XMM0 + 13)); + EXPECT_EQ(tester.dump_to_hex_string(true), + "C5E175DBC4C16175DDC59175DBC4C11175DDC56175EBC4416175EDC51175EBC4411175ED"); +} + +TEST(EmitterAVX, VPCMPEQD) { + CodeTester tester; + tester.init_code_buffer(1024); + tester.emit(IGen::parallel_compare_e_w(XMM0 + 3, XMM0 + 3, XMM0 + 3)); + tester.emit(IGen::parallel_compare_e_w(XMM0 + 3, XMM0 + 3, XMM0 + 13)); + tester.emit(IGen::parallel_compare_e_w(XMM0 + 3, XMM0 + 13, XMM0 + 3)); + tester.emit(IGen::parallel_compare_e_w(XMM0 + 3, XMM0 + 13, XMM0 + 13)); + tester.emit(IGen::parallel_compare_e_w(XMM0 + 13, XMM0 + 3, XMM0 + 3)); + tester.emit(IGen::parallel_compare_e_w(XMM0 + 13, XMM0 + 3, XMM0 + 13)); + tester.emit(IGen::parallel_compare_e_w(XMM0 + 13, XMM0 + 13, XMM0 + 3)); + tester.emit(IGen::parallel_compare_e_w(XMM0 + 13, XMM0 + 13, XMM0 + 13)); + EXPECT_EQ(tester.dump_to_hex_string(true), + "C5E176DBC4C16176DDC59176DBC4C11176DDC56176EBC4416176EDC51176EBC4411176ED"); +} + +TEST(EmitterAVX, VPCMPGTB) { + CodeTester tester; + tester.init_code_buffer(1024); + tester.emit(IGen::parallel_compare_gt_b(XMM0 + 3, XMM0 + 3, XMM0 + 3)); + tester.emit(IGen::parallel_compare_gt_b(XMM0 + 3, XMM0 + 3, XMM0 + 13)); + tester.emit(IGen::parallel_compare_gt_b(XMM0 + 3, XMM0 + 13, XMM0 + 3)); + tester.emit(IGen::parallel_compare_gt_b(XMM0 + 3, XMM0 + 13, XMM0 + 13)); + tester.emit(IGen::parallel_compare_gt_b(XMM0 + 13, XMM0 + 3, XMM0 + 3)); + tester.emit(IGen::parallel_compare_gt_b(XMM0 + 13, XMM0 + 3, XMM0 + 13)); + tester.emit(IGen::parallel_compare_gt_b(XMM0 + 13, XMM0 + 13, XMM0 + 3)); + tester.emit(IGen::parallel_compare_gt_b(XMM0 + 13, XMM0 + 13, XMM0 + 13)); + EXPECT_EQ(tester.dump_to_hex_string(true), + "C5E164DBC4C16164DDC59164DBC4C11164DDC56164EBC4416164EDC51164EBC4411164ED"); +} + +TEST(EmitterAVX, VPCMPGTW) { + CodeTester tester; + tester.init_code_buffer(1024); + tester.emit(IGen::parallel_compare_gt_h(XMM0 + 3, XMM0 + 3, XMM0 + 3)); + tester.emit(IGen::parallel_compare_gt_h(XMM0 + 3, XMM0 + 3, XMM0 + 13)); + tester.emit(IGen::parallel_compare_gt_h(XMM0 + 3, XMM0 + 13, XMM0 + 3)); + tester.emit(IGen::parallel_compare_gt_h(XMM0 + 3, XMM0 + 13, XMM0 + 13)); + tester.emit(IGen::parallel_compare_gt_h(XMM0 + 13, XMM0 + 3, XMM0 + 3)); + tester.emit(IGen::parallel_compare_gt_h(XMM0 + 13, XMM0 + 3, XMM0 + 13)); + tester.emit(IGen::parallel_compare_gt_h(XMM0 + 13, XMM0 + 13, XMM0 + 3)); + tester.emit(IGen::parallel_compare_gt_h(XMM0 + 13, XMM0 + 13, XMM0 + 13)); + EXPECT_EQ(tester.dump_to_hex_string(true), + "C5E165DBC4C16165DDC59165DBC4C11165DDC56165EBC4416165EDC51165EBC4411165ED"); +} + +TEST(EmitterAVX, VPCMPGTD) { + CodeTester tester; + tester.init_code_buffer(1024); + tester.emit(IGen::parallel_compare_gt_w(XMM0 + 3, XMM0 + 3, XMM0 + 3)); + tester.emit(IGen::parallel_compare_gt_w(XMM0 + 3, XMM0 + 3, XMM0 + 13)); + tester.emit(IGen::parallel_compare_gt_w(XMM0 + 3, XMM0 + 13, XMM0 + 3)); + tester.emit(IGen::parallel_compare_gt_w(XMM0 + 3, XMM0 + 13, XMM0 + 13)); + tester.emit(IGen::parallel_compare_gt_w(XMM0 + 13, XMM0 + 3, XMM0 + 3)); + tester.emit(IGen::parallel_compare_gt_w(XMM0 + 13, XMM0 + 3, XMM0 + 13)); + tester.emit(IGen::parallel_compare_gt_w(XMM0 + 13, XMM0 + 13, XMM0 + 3)); + tester.emit(IGen::parallel_compare_gt_w(XMM0 + 13, XMM0 + 13, XMM0 + 13)); + EXPECT_EQ(tester.dump_to_hex_string(true), + "C5E166DBC4C16166DDC59166DBC4C11166DDC56166EBC4416166EDC51166EBC4411166ED"); +} + +TEST(EmitterAVX, VPUNPCKLBW) { + CodeTester tester; + tester.init_code_buffer(1024); + tester.emit(IGen::pextlb_swapped(XMM0 + 3, XMM0 + 3, XMM0 + 3)); + tester.emit(IGen::pextlb_swapped(XMM0 + 3, XMM0 + 3, XMM0 + 13)); + tester.emit(IGen::pextlb_swapped(XMM0 + 3, XMM0 + 13, XMM0 + 3)); + tester.emit(IGen::pextlb_swapped(XMM0 + 3, XMM0 + 13, XMM0 + 13)); + tester.emit(IGen::pextlb_swapped(XMM0 + 13, XMM0 + 3, XMM0 + 3)); + tester.emit(IGen::pextlb_swapped(XMM0 + 13, XMM0 + 3, XMM0 + 13)); + tester.emit(IGen::pextlb_swapped(XMM0 + 13, XMM0 + 13, XMM0 + 3)); + tester.emit(IGen::pextlb_swapped(XMM0 + 13, XMM0 + 13, XMM0 + 13)); + EXPECT_EQ(tester.dump_to_hex_string(true), + "C5E160DBC4C16160DDC59160DBC4C11160DDC56160EBC4416160EDC51160EBC4411160ED"); +} + +TEST(EmitterAVX, VPUNPCKLWD) { + CodeTester tester; + tester.init_code_buffer(1024); + tester.emit(IGen::pextlh_swapped(XMM0 + 3, XMM0 + 3, XMM0 + 3)); + tester.emit(IGen::pextlh_swapped(XMM0 + 3, XMM0 + 3, XMM0 + 13)); + tester.emit(IGen::pextlh_swapped(XMM0 + 3, XMM0 + 13, XMM0 + 3)); + tester.emit(IGen::pextlh_swapped(XMM0 + 3, XMM0 + 13, XMM0 + 13)); + tester.emit(IGen::pextlh_swapped(XMM0 + 13, XMM0 + 3, XMM0 + 3)); + tester.emit(IGen::pextlh_swapped(XMM0 + 13, XMM0 + 3, XMM0 + 13)); + tester.emit(IGen::pextlh_swapped(XMM0 + 13, XMM0 + 13, XMM0 + 3)); + tester.emit(IGen::pextlh_swapped(XMM0 + 13, XMM0 + 13, XMM0 + 13)); + EXPECT_EQ(tester.dump_to_hex_string(true), + "C5E161DBC4C16161DDC59161DBC4C11161DDC56161EBC4416161EDC51161EBC4411161ED"); +} + TEST(EmitterAVX, VPUNPCKLDQ) { CodeTester tester; tester.init_code_buffer(1024); @@ -388,6 +508,36 @@ TEST(EmitterAVX, VPUNPCKLDQ) { "C5E162DBC4C16162DDC59162DBC4C11162DDC56162EBC4416162EDC51162EBC4411162ED"); } +TEST(EmitterAVX, VPUNPCKHBW) { + CodeTester tester; + tester.init_code_buffer(1024); + tester.emit(IGen::pextub_swapped(XMM0 + 3, XMM0 + 3, XMM0 + 3)); + tester.emit(IGen::pextub_swapped(XMM0 + 3, XMM0 + 3, XMM0 + 13)); + tester.emit(IGen::pextub_swapped(XMM0 + 3, XMM0 + 13, XMM0 + 3)); + tester.emit(IGen::pextub_swapped(XMM0 + 3, XMM0 + 13, XMM0 + 13)); + tester.emit(IGen::pextub_swapped(XMM0 + 13, XMM0 + 3, XMM0 + 3)); + tester.emit(IGen::pextub_swapped(XMM0 + 13, XMM0 + 3, XMM0 + 13)); + tester.emit(IGen::pextub_swapped(XMM0 + 13, XMM0 + 13, XMM0 + 3)); + tester.emit(IGen::pextub_swapped(XMM0 + 13, XMM0 + 13, XMM0 + 13)); + EXPECT_EQ(tester.dump_to_hex_string(true), + "C5E168DBC4C16168DDC59168DBC4C11168DDC56168EBC4416168EDC51168EBC4411168ED"); +} + +TEST(EmitterAVX, VPUNPCKHWD) { + CodeTester tester; + tester.init_code_buffer(1024); + tester.emit(IGen::pextuh_swapped(XMM0 + 3, XMM0 + 3, XMM0 + 3)); + tester.emit(IGen::pextuh_swapped(XMM0 + 3, XMM0 + 3, XMM0 + 13)); + tester.emit(IGen::pextuh_swapped(XMM0 + 3, XMM0 + 13, XMM0 + 3)); + tester.emit(IGen::pextuh_swapped(XMM0 + 3, XMM0 + 13, XMM0 + 13)); + tester.emit(IGen::pextuh_swapped(XMM0 + 13, XMM0 + 3, XMM0 + 3)); + tester.emit(IGen::pextuh_swapped(XMM0 + 13, XMM0 + 3, XMM0 + 13)); + tester.emit(IGen::pextuh_swapped(XMM0 + 13, XMM0 + 13, XMM0 + 3)); + tester.emit(IGen::pextuh_swapped(XMM0 + 13, XMM0 + 13, XMM0 + 13)); + EXPECT_EQ(tester.dump_to_hex_string(true), + "C5E169DBC4C16169DDC59169DBC4C11169DDC56169EBC4416169EDC51169EBC4411169ED"); +} + TEST(EmitterAVX, VPUNPCKHDQ) { CodeTester tester; tester.init_code_buffer(1024); @@ -433,21 +583,6 @@ TEST(EmitterAVX, VPUNPCKHQDQ) { "C5E16DDBC4C1616DDDC5916DDBC4C1116DDDC5616DEBC441616DEDC5116DEBC441116DED"); } -TEST(EmitterAVX, VPCMPEQD) { - CodeTester tester; - tester.init_code_buffer(1024); - tester.emit(IGen::pceqw(XMM0 + 3, XMM0 + 3, XMM0 + 3)); - tester.emit(IGen::pceqw(XMM0 + 3, XMM0 + 3, XMM0 + 13)); - tester.emit(IGen::pceqw(XMM0 + 3, XMM0 + 13, XMM0 + 3)); - tester.emit(IGen::pceqw(XMM0 + 3, XMM0 + 13, XMM0 + 13)); - tester.emit(IGen::pceqw(XMM0 + 13, XMM0 + 3, XMM0 + 3)); - tester.emit(IGen::pceqw(XMM0 + 13, XMM0 + 3, XMM0 + 13)); - tester.emit(IGen::pceqw(XMM0 + 13, XMM0 + 13, XMM0 + 3)); - tester.emit(IGen::pceqw(XMM0 + 13, XMM0 + 13, XMM0 + 13)); - EXPECT_EQ(tester.dump_to_hex_string(true), - "C5E176DBC4C16176DDC59176DBC4C11176DDC56176EBC4416176EDC51176EBC4411176ED"); -} - TEST(EmitterAVX, VPSRLDQ) { CodeTester tester; tester.init_code_buffer(1024); @@ -566,4 +701,4 @@ TEST(EmitterAVX, VPAND) { tester.emit(IGen::parallel_bitwise_and(XMM0 + 13, XMM0 + 13, XMM0 + 13)); EXPECT_EQ(tester.dump_to_hex_string(true), "C5E1DBDBC4C161DBDDC591DBDBC4C111DBDDC561DBEBC44161DBEDC511DBEBC44111DBED"); -} \ No newline at end of file +}