diff --git a/doc/emitter.md b/doc/emitter.md new file mode 100644 index 0000000000..c98cc6ea4d --- /dev/null +++ b/doc/emitter.md @@ -0,0 +1,24 @@ +# Emitter +x86-64 has a lot of instructions. They are described in Volume 2 of the 5 Volume "Intel® 64 and IA-32 Architectures Software Developer’s Manual". Just this volume alone is over 2000 pages, which would take forever to fully implement. As a result, we will use only a subset of these instructions. This the rough plan: + +- Most instructions like `add` will only be implemented with `r64 r64` versions. +- To accomplish something like `add rax, 1`, we will use a temporary register `X` + - `mov X, 1` + - `add rax, X` + - The constant propagation system will be able to provide enough information that we could eventually use `add r64 immX` and similar if needed. + - Register allocation should handle the case `(set! x (+ 3 y))` as: + - `mov x, 3` + - `add x, y` + - but `(set! x (+ y 3))`, in cases where `y` is needed after and `x` can't take its place, will become the inefficient + - `mov x, y` + - `mov rtemp, 3` + - `add x, rtemp` +- Loading constants into registers will be done efficiently, using the same strategy used by modern versions of `gcc` and `clang`. +- Memory access will be done in the form `mov rdest, [roff + raddr]` where `roff` is the offset register. Doing memory access in this form was found to be much faster in simple benchmark test. +- Memory access to the stack will have an extra `sub` and more complicated dereference. GOAL code seems to avoid using the stack in most places, and I suspect the programmers attempted to avoid stack spills. + - `mov rdest, rsp` : coloring move for upcoming subtract + - `sub rdest, roff` : convert real pointer to GOAL pointer + - `mov rdest, [rdest + roff + variable_offset]` : access memory through normal GOAL deref. + - Note - we should check that the register allocator gets this right always, and eliminates moves and avoid using a temporary register. + - Again, the constant propagation should give use enough information, if we ever want/need to implement a more efficient `mov rdest, [rsp + varaible_offset]` type instructions. +- Memory access to static data should use `rip` addressing, like `mov rdest, [rip + offset]`. And creating pointers to static data could be `lea rdest, [rip - roff + offset]` \ No newline at end of file diff --git a/doc/registers.md b/doc/registers.md new file mode 100644 index 0000000000..97d5280b8d --- /dev/null +++ b/doc/registers.md @@ -0,0 +1,111 @@ +## Registers +Although modern computers are much faster than the PS2, and we could probably get away with a really inefficient register allocation scheme, I think it's worth it to get this right. + + +## Register differences between MIPS and x86-64 +The PS2's MIPS processor has these categories of register: +- General Purpose. They are 128-bit, but usually only lower 64 bits are used. 32 registers, each 128-bits. +- Floating point registers. 32 registers, each for a 32-bit float. +- Vector float registers. 32 registers, each for 4x 32-bit floats. Used only in inline assembly +- `vi` registers. 16 registers, each a 16-bit integer. Used very rarely in inline assembly + +There are also some control/special registers too (`Q`, `R`...), but code using these will be manually ported. + +In comparison, x86-64 has much fewer registers: +- 16 General Purpose. Each 64-bits +- 16 `xmm` registers. 128-bits, and can store either 128-bit integers or 4x 32-bit floats + +Here is the mapping: +- MIPS GPR (lower 64 bits only) - x86-64 GPR +- MIPS GPR (128-bits, only special cases) - x64-64 `xmm` +- MIPS floating point - x64-64 `xmm` (lower 32-bits) +- MIPS vector float - x64-64 `xmm` (packed single) +- MIPS `vi` - manually handled?? + +Here is the MIPS GPR map +- `r0` or `zero` : always zero +- `r1` or `at`: assembler temporary, not saved, not used by compiler +- `r2` or `v0`: return value, not saved +- `r3` or `v1`: not saved +- `r4` or `a0`: not saved, argument 0 +- `r5` or `a1`: not saved, argument 1 +- `r6` or `a2`: not saved, argument 2 +- `r7` or `a3`: not saved, argument 3 +- `r8` or `t0`: not saved, argument 4 +- `r9` or `t1`: not saved, argument 5 +- `r10` or `t2`: not saved, argument 6 +- `r11` or `t3`: not saved, argument 7 +- `r12` or `t4`: not saved +- `r13` or `t5`: not saved +- `r14` or `t6`: not saved +- `r15` or `t7`: not saved +- `r16` or `s0`: saved +- `r17` or `s1`: saved +- `r18` or `s2`: saved +- `r19` or `s3`: saved +- `r20` or `s4`: saved +- `r21` or `s5`: saved +- `r22` or `s6`: saved, process pointer +- `r23` or `s7`: saved, symbol pointer +- `r24` or `t8`: not saved +- `r25` or `t9`: function call pointer +- `r26` or `k0`: kernel reserved (unused) +- `r27` or `k1`: kernel reserved (unused) +- `r28` or `gp`: saved +- `r29` or `sp`: stack pointer +- `r30` or `fp`: current function pointer +- `r31` or `ra`: return address pointer + + +And the x86-64 GPR map +- `rax`: return value +- `rcx`: argument 3 +- `rdx`: argument 2 +- `rbx`: saved +- `rsp`: stack pointer +- `rbp`: saved +- `rsi`: argument 1 +- `rdi`: argument 0 +- `r8`: argument 4 +- `r9`: argument 5 +- `r10`: argument 6, saved if not argument +- `r11`: argument 7, saved if not argument +- `r12`: saved +- `r13`: process pointer +- `r14`: symbol table +- `r15`: offset pointer + + +### Plan for Memory Access +The PS2 uses 32-bit pointers, and changing the pointer size is likely to introduce bugs, so we will keep using 32-bit pointers. Also, GOAL has some hardcoded checks on the value for pointers, so we need to make sure the memory appears to the program at the correct address. + +To do this, we have separate "GOAL Pointers" and "real pointers". The "real pointers" are just normal x86-64 pointers, and the "GOAL Pointer" is an offset into a main memory array. A "real pointer" to the main memory array is stored in `r15` (offset pointer) when GOAL code is executing, and the GOAL compiler will automatically add this to all memory accesses. + +The overhead from doing this is not as bad as you might expect - x86 has nice addressing modes (Scale Index Base) which are quite fast, and don't require the use of temporary registers. If this does turn out to be much slower than I expect, we can introduce the concept of real pointers in GOAL code, and use them in places where we are limited in accessing memory. + +The main RAM is mapped at `0x0` on the PS2, with the first 1 MB reserved for the kernel. We should make sure that the first 1 MB of GOAL main memory will cause a segfault if read/written/executed, to catch null pointer bugs. + +In the C Kernel code, the `r15` pointer doesn't exist. Instead, `g_ee_main_memory` is a global which points to the beginning of GOAL main memory. The `Ptr` template class takes care of converting GOAL and C++ pointers in a convenient way, and catches null pointer access. + +The GOAL stack pointer should likely be a real pointer, for performance reasons. This makes pushing/popping/calling/returning/accessing stack variables much faster, with the only cost being getting a GOAL stack pointer requiring some extra work. The stack pointer's value is read/written extremely rarely, so this seems like a good tradeoff. + +The other registers are less clear. The process pointer can probably be a real pointer. But the symbol table could go a few ways: +1. Make it a real pointer. Symbol value access is fast, but comparison against false requires two extra operations. +2. Make it a GOAL pointer. Symbol value access requires more complicated addressing modes, but comparison against false is fast. + +Right now I'm leaning toward 1, but making it a configurable option in case I'm wrong. It should only be a change in a few places (emitter + where it's set up in the runtime). + +### Plan for Function Call and Arguments +In GOAL for MIPS, function calls are weird. Functions are always called by register using `t9`. There seems to be a different register allocator for function pointers, as nested function calls have really wacky register allocation. In GOAL-x86-64, this restriction will be removed, and a function can be called from any register. (see next section for why we can do this) + +Unfortunately, GOAL's 128-bit function arguments present a big challenge. When calling a function, we can't know if the function we're calling is expecting an integer, float, or 128-bit integer. In fact, the caller may not even know if it has an integer, float, or 128-bit integer. The easy and foolproof way to get this right is to use 128-bit `xmm` registers for all arguments and return values, but this will cause a massive performance hit and increase code size, as we'll have to move values between register types constantly. The current plan is this: + +- Floats go in GPRs for arguments/return values. GOAL does this too, and takes the hit of converting between registers as well. Probably the impact on a modern CPU is even worse, but we can live with it. +- We'll compromise + + +### Plan for Static Data + +### Plan for Memory + +### Other details diff --git a/game/CMakeLists.txt b/game/CMakeLists.txt index 05e0d99744..b928900c41 100644 --- a/game/CMakeLists.txt +++ b/game/CMakeLists.txt @@ -7,7 +7,6 @@ set(CMAKE_CXX_FLAGS "-O0 -ggdb -Wall \ enable_language(ASM_NASM) set(RUNTIME_SOURCE - main.cpp runtime.cpp system/SystemThread.cpp system/IOP_Kernel.cpp @@ -49,7 +48,7 @@ set(RUNTIME_SOURCE overlord/stream.cpp) # the runtime should be built without any static/dynamic libraries. -add_executable(gk ${RUNTIME_SOURCE}) +add_executable(gk ${RUNTIME_SOURCE} main.cpp) # we also build a runtime library for testing. This version is likely unable to call GOAL code correctly, but # can be used to test other things. diff --git a/game/kernel/fileio.cpp b/game/kernel/fileio.cpp index 585748bf0d..c5ccbb3e43 100644 --- a/game/kernel/fileio.cpp +++ b/game/kernel/fileio.cpp @@ -199,6 +199,7 @@ char* basename_goal(char* s) { } } + /* Original code, has memory bug. // back up... for (;;) { if (pt < input) { @@ -211,6 +212,20 @@ char* basename_goal(char* s) { return pt + 1; // and return one past } } + */ + + // back up... + for (;;) { + if (pt <= input) { + return input; + } + pt--; + char c = *pt; + // until we hit a slash. + if (c == '\\' || c == '/') { // slashes + return pt + 1; // and return one past + } + } } /*! diff --git a/game/system/Deci2Server.h b/game/system/Deci2Server.h index 1bdb8c2d06..d3db7d96ca 100644 --- a/game/system/Deci2Server.h +++ b/game/system/Deci2Server.h @@ -34,9 +34,9 @@ class Deci2Server { void accept_thread_func(); bool kill_accept_thread = false; char* buffer = nullptr; - int server_fd; + int server_fd = -1; sockaddr_in addr; - int new_sock; + int new_sock = -1; bool server_initialized = false; bool accept_thread_running = false; bool server_connected = false; diff --git a/goalc/emitter/CMakeLists.txt b/goalc/emitter/CMakeLists.txt index fde45f4cb7..1d6cc0dc8f 100644 --- a/goalc/emitter/CMakeLists.txt +++ b/goalc/emitter/CMakeLists.txt @@ -1,3 +1,3 @@ add_library(emitter - CodeTester.cpp - registers.cpp) \ No newline at end of file + Register.cpp + CodeTester.cpp) \ No newline at end of file diff --git a/goalc/emitter/CodeTester.cpp b/goalc/emitter/CodeTester.cpp index f722cadad8..da5bfab9ae 100644 --- a/goalc/emitter/CodeTester.cpp +++ b/goalc/emitter/CodeTester.cpp @@ -1,40 +1,61 @@ +/*! + * @file CodeTester.cpp + * The CodeTester is a utility to run the output of the compiler as part of a unit test. + * This is effective for tests which try all combinations of registers, etc. + * + * The CodeTester can't be used for tests requiring the full GOAL language/linking. + */ + #include -#include #include "CodeTester.h" -#include "Instruction.h" #include "IGen.h" -namespace goal { +namespace emitter { + +CodeTester::CodeTester() : m_info(RegisterInfo::make_register_info()) {} -std::string CodeTester::dump_to_hex_string() { +/*! + * Convert to a string for comparison against an assembler or tests. + */ +std::string CodeTester::dump_to_hex_string(bool nospace) { std::string result; char buff[32]; for (int i = 0; i < code_buffer_size; i++) { - sprintf(buff, "%02x ", code_buffer[i]); + if (nospace) { + sprintf(buff, "%02X", code_buffer[i]); + } else { + sprintf(buff, "%02x ", code_buffer[i]); + } + result += buff; } // remove trailing space - if (!result.empty()) { + if (!nospace && !result.empty()) { result.pop_back(); } return result; } +/*! + * Add an instruction to the buffer. + */ void CodeTester::emit(const Instruction& instr) { code_buffer_size += instr.emit(code_buffer + code_buffer_size); assert(code_buffer_size <= code_buffer_capacity); } -void CodeTester::emit_set_gpr_as_return(X86R gpr) { - assert(is_gpr(gpr)); - emit(IGen::mov_gpr64_gpr64(RAX, gpr)); -} - +/*! + * Add a return instruction to the buffer. + */ void CodeTester::emit_return() { emit(IGen::ret()); } +/*! + * Pop all GPRs off of the stack. Optionally exclude rax. + * Pops RSP always, which is weird, but doesn't cause issues. + */ void CodeTester::emit_pop_all_gprs(bool exclude_rax) { for (int i = 16; i-- > 0;) { if (i != RAX || !exclude_rax) { @@ -43,6 +64,10 @@ void CodeTester::emit_pop_all_gprs(bool exclude_rax) { } } +/*! + * Push all GPRs onto the stack. Optionally exclude RAX. + * Pushes RSP always, which is weird, but doesn't cause issues. + */ void CodeTester::emit_push_all_gprs(bool exclude_rax) { for (int i = 0; i < 16; i++) { if (i != RAX || !exclude_rax) { @@ -51,14 +76,53 @@ void CodeTester::emit_push_all_gprs(bool exclude_rax) { } } +/*! + * Push all xmm registers (all 128-bits) to the stack. + */ +void CodeTester::emit_push_all_xmms() { + emit(IGen::sub_gpr64_imm8s(RSP, 8)); + for (int i = 0; i < 16; i++) { + emit(IGen::sub_gpr64_imm8s(RSP, 16)); + emit(IGen::store128_gpr64_xmm128(RSP, XMM0 + i)); + } +} + +/*! + * Pop all xmm registers (all 128-bits) from the stack + */ +void CodeTester::emit_pop_all_xmms() { + for (int i = 0; i < 16; i++) { + emit(IGen::load128_xmm128_gpr64(XMM0 + i, RSP)); + emit(IGen::add_gpr64_imm8s(RSP, 16)); + } + emit(IGen::add_gpr64_imm8s(RSP, 8)); +} + +/*! + * Remove everything from the code buffer + */ void CodeTester::clear() { code_buffer_size = 0; } +/*! + * Execute the buffered code with no arguments, return the value of RAX. + */ u64 CodeTester::execute() { return ((u64(*)())code_buffer)(); } +/*! + * Execute code buffer with arguments. Use get_c_abi_arg to figure out which registers the + * arguments will appear in (will handle windows/linux differences) + */ +u64 CodeTester::execute(u64 in0, u64 in1, u64 in2, u64 in3) { + return ((u64(*)(u64, u64, u64, u64))code_buffer)(in0, in1, in2, in3); +} + +/*! + * Allocate a code buffer of the given size. + */ void CodeTester::init_code_buffer(int capacity) { code_buffer = (u8*)mmap(nullptr, capacity, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); @@ -76,5 +140,4 @@ CodeTester::~CodeTester() { munmap(code_buffer, code_buffer_capacity); } } - -} // namespace goal \ No newline at end of file +} // namespace emitter \ No newline at end of file diff --git a/goalc/emitter/CodeTester.h b/goalc/emitter/CodeTester.h index 4bd7756c06..ee174ac659 100644 --- a/goalc/emitter/CodeTester.h +++ b/goalc/emitter/CodeTester.h @@ -1,28 +1,123 @@ /*! - * @file CodeTester - * CodeTester is a utility which allows small segments of x86 code to be run, for the purpose of - * testing the compiler's code emitter. It is not suitable for testing compiled GOAL code. + * @file CodeTester.h + * The CodeTester is a utility to run the output of the compiler as part of a unit test. + * This is effective for tests which try all combinations of registers, etc. + * + * The CodeTester can't be used for tests requiring the full GOAL language/linking. */ -#ifndef JAK1_CODETESTER_H -#define JAK1_CODETESTER_H +#ifndef JAK_CODETESTER_H +#define JAK_CODETESTER_H #include #include "common/common_types.h" -#include "registers.h" +#include "Register.h" #include "Instruction.h" -namespace goal { +namespace emitter { class CodeTester { public: - std::string dump_to_hex_string(); + CodeTester(); + std::string dump_to_hex_string(bool nospace = false); void init_code_buffer(int capacity); void emit_push_all_gprs(bool exclude_rax = false); void emit_pop_all_gprs(bool exclude_rax = false); + void emit_push_all_xmms(); + void emit_pop_all_xmms(); void emit_return(); - void emit_set_gpr_as_return(X86R gpr); void emit(const Instruction& instr); u64 execute(); + u64 execute(u64 in0, u64 in1, u64 in2, u64 in3); + + /*! + * Execute the function, get the return value in RAX, convert to a T, and return it. + */ + template + T execute_ret(u64 in0, u64 in1, u64 in2, u64 in3) { + u64 result_u64 = ((u64(*)(u64, u64, u64, u64))code_buffer)(in0, in1, in2, in3); + T result_T; + memcpy(&result_T, &result_u64, sizeof(T)); + return result_T; + } + + /*! + * Add data to the code buffer. + */ + template + int emit_data(T x) { + auto ret = code_buffer_size; + assert(int(sizeof(T)) + code_buffer_size <= code_buffer_capacity); + memcpy(code_buffer + code_buffer_size, &x, sizeof(T)); + code_buffer_size += sizeof(T); + return ret; + } + + /*! + * Should allow emitter tests which run code to do the right thing on windows. + */ + Register get_c_abi_arg_reg(int i) { +#ifdef _WIN32 + switch (i) { + case 0: + return RCX; + case 1: + return RDX; + case 2: + return R8; + case 3: + return R9; + default: + assert(false); + } +#else + switch (i) { + case 0: + return RDI; + case 1: + return RSI; + case 2: + return RDX; + case 3: + return RCX; + default: + assert(false); + } +#endif + } + + /*! + * Get the name of the given register, for debugging. + */ + std::string reg_name(Register x) { return m_info.get_info(x).name; } + + /*! + * Get number of bytes currently in use (offset of the next thing to be added) + */ + int size() const { return code_buffer_size; } + const u8* data() const { return code_buffer; } + + /*! + * Write over existing data at the given offset. + */ + template + void write(T x, int at) { + assert(at >= 0); + assert(int(sizeof(T)) + at <= code_buffer_capacity); + memcpy(code_buffer + at, &x, sizeof(T)); + } + + /*! + * Read existing data at the given offset. + */ + template + T read(int at) { + assert(at >= 0); + assert(int(sizeof(T)) + at <= code_buffer_capacity); + T result; + memcpy(&result, code_buffer + at, sizeof(T)); + return result; + } + void clear(); ~CodeTester(); @@ -30,7 +125,7 @@ class CodeTester { int code_buffer_size = 0; int code_buffer_capacity = 0; u8* code_buffer = nullptr; + RegisterInfo m_info; }; -} // namespace goal - -#endif // JAK1_CODETESTER_H +} // namespace emitter +#endif // JAK_CODETESTER_H diff --git a/goalc/emitter/IGen.h b/goalc/emitter/IGen.h index eea12519e1..978329adf6 100644 --- a/goalc/emitter/IGen.h +++ b/goalc/emitter/IGen.h @@ -1,56 +1,58 @@ -#ifndef JAK1_IGEN_H -#define JAK1_IGEN_H +#ifndef JAK_IGEN_H +#define JAK_IGEN_H -#include +#include +#include "Register.h" #include "Instruction.h" -#include "registers.h" -namespace goal { +namespace emitter { class IGen { public: //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; // MOVES //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; /*! - * mov gpr, gpr, 64 bits + * Move data from src to dst. Moves all 64-bits of the GPR. */ - static Instruction mov_gpr64_gpr64(uint8_t dst, uint8_t src) { - assert(is_gpr(dst)); - assert(is_gpr(src)); + static Instruction mov_gpr64_gpr64(Register dst, Register src) { + assert(dst.is_gpr()); + assert(src.is_gpr()); Instruction instr(0x89); - instr.set_modrm_and_rex(src, dst, 3, true); + instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, true); return instr; } /*! * Move a 64-bit constant into a register. */ - static Instruction mov_gpr64_u64(uint8_t dst, uint64_t val) { - assert(is_gpr(dst)); + static Instruction mov_gpr64_u64(Register dst, uint64_t val) { + assert(dst.is_gpr()); bool rex_b = false; - if (dst >= 8) { - dst -= 8; + auto dst_hw_id = dst.hw_id(); + if (dst_hw_id >= 8) { + dst_hw_id -= 8; rex_b = true; } - Instruction instr(0xb8 + dst); + Instruction instr(0xb8 + dst_hw_id); instr.set(REX(true, false, false, rex_b)); instr.set(Imm(8, val)); return instr; } /*! - * Move a 32-bit constant into a register. + * Move a 32-bit constant into a register. Zeros the upper 32 bits. */ - static Instruction mov_gpr64_u32(uint8_t dst, uint64_t val) { + static Instruction mov_gpr64_u32(Register dst, uint64_t val) { assert(val <= UINT32_MAX); - assert(is_gpr(dst)); + assert(dst.is_gpr()); + auto dst_hw_id = dst.hw_id(); bool rex_b = false; - if (dst >= 8) { - dst -= 8; + if (dst_hw_id >= 8) { + dst_hw_id -= 8; rex_b = true; } - Instruction instr(0xb8 + dst); + Instruction instr(0xb8 + dst_hw_id); if (rex_b) { instr.set(REX(false, false, false, rex_b)); } @@ -59,15 +61,15 @@ class IGen { } /*! - * Move a signed 32-bit constant into a register. + * Move a signed 32-bit constant into a register. Sign extends for the upper 32 bits. * When possible prefer mov_gpr64_u32. (use this only for negative values...) * This is always bigger than mov_gpr64_u32, but smaller than a mov_gpr_u64. */ - static Instruction mov_gpr64_s32(uint8_t dst, int64_t val) { + static Instruction mov_gpr64_s32(Register dst, int64_t val) { assert(val >= INT32_MIN && val <= INT32_MAX); - assert(is_gpr(dst)); + assert(dst.is_gpr()); Instruction instr(0xc7); - instr.set_modrm_and_rex(0, dst, 3, true); + instr.set_modrm_and_rex(0, dst.hw_id(), 3, true); instr.set(Imm(4, val)); return instr; } @@ -75,27 +77,27 @@ class IGen { /*! * Move 32-bits of xmm to 32 bits of gpr (no sign extension). */ - static Instruction movd_gpr32_xmm32(uint8_t dst, uint8_t src) { - assert(is_gpr(dst)); - assert(is_xmm(src)); + static Instruction movd_gpr32_xmm32(Register dst, Register src) { + assert(dst.is_gpr()); + assert(src.is_xmm()); Instruction instr(0x66); instr.set_op2(0x0f); instr.set_op3(0x7e); - instr.set_modrm_and_rex(xmm_to_id(src), dst, 3, false); + instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, false); instr.swap_op0_rex(); return instr; } /*! - * Move 32-bits of gpr to 32-bits of xmm (no sign extenion) + * Move 32-bits of gpr to 32-bits of xmm (no sign extension) */ - static Instruction movd_xmm32_gpr32(uint8_t dst, uint8_t src) { - assert(is_xmm(dst)); - assert(is_gpr(src)); + static Instruction movd_xmm32_gpr32(Register dst, Register src) { + assert(dst.is_xmm()); + assert(src.is_gpr()); Instruction instr(0x66); instr.set_op2(0x0f); instr.set_op3(0x6e); - instr.set_modrm_and_rex(dst, xmm_to_id(src), 3, false); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); instr.swap_op0_rex(); return instr; } @@ -103,645 +105,1642 @@ class IGen { /*! * Move 32-bits between xmm's */ - static Instruction mov_xmm32_xmm32(uint8_t dst, uint8_t src) { - assert(is_xmm(dst)); - assert(is_xmm(src)); + static Instruction mov_xmm32_xmm32(Register dst, Register src) { + assert(dst.is_xmm()); + assert(src.is_xmm()); Instruction instr(0xf3); instr.set_op2(0x0f); instr.set_op3(0x10); - instr.set_modrm_and_rex(xmm_to_id(dst), xmm_to_id(src), 3, false); - return instr; - } - // - // //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // // LOADS n' STORES - // //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // - // /*! - // * Store 8-bits from register into a memory location that is the sum of a 64-bit register - // * and signed 32-bit offset. - // */ - // static Instruction store8_r64off32s_gpr8(uint8_t dst_reg, int32_t offset, uint8_t src_reg) { - // Instruction instr(0x88); - // instr.set_modrm_and_rex_for_addr(src_reg, dst_reg, 2, false); - // instr.set_disp(Imm(4, offset)); - // if (src_reg > int(X86R::RBX)) { - // instr.add_rex(); - // } - // return instr; - // } - // - // /*! - // * Store 16-bits from register into a memory location that is the sum of a 64-bit register - // * and signed 32-bit offset. - // */ - // static Instruction store16_r64off32s_gpr16(uint8_t dst_reg, int32_t offset, uint8_t src_reg) { - // Instruction instr(0x66); - // instr.set_op2(0x89); - // instr.set_modrm_and_rex_for_addr(src_reg, dst_reg, 2, false); - // instr.set_disp(Imm(4, offset)); - // return instr; - // } - // - // /*! - // * Store 32-bits from register into a memory location that is the sum of a 64-bit register - // * and signed 32-bit offset. - // */ - // static Instruction store32_r64off32s_gpr32(uint8_t dst_reg, int32_t offset, uint8_t src_reg) { - // Instruction instr(0x89); - // instr.set_modrm_and_rex_for_addr(src_reg, dst_reg, 2, false); - // instr.set_disp(Imm(4, offset)); - // return instr; - // } - // - // /*! - // * Store 64-bits from gpr into memory located at 64-bit reg + 32-bit signed offset. - // */ - // static Instruction store64_r64off32s_gpr64(uint8_t dst_reg, int32_t offset, uint8_t src_reg) { - // Instruction instr(0x89); - // instr.set_modrm_rex_sib_for_reg_reg_disp32(src_reg, 2, dst_reg, true); - // instr.set_disp(Imm(4, offset)); - // return instr; - // } - // - // /*! - // * Load 8-bits from memory (at address of 64-bit reg + 32-bit signed offset) into gpr (zero - // * extended) - // */ - // static Instruction load16_gpr8z_r64off32s(uint8_t dst, uint8_t src, int32_t offset) { - // Instruction instr(0x0f); - // instr.set_op2(0xb6); - // instr.set_modrm_rex_sib_for_reg_reg_disp32(dst, 2, src, true); - // instr.set_disp(Imm(4, offset)); - // return instr; - // } - // - // /*! - // * Load 16-bits from memory (at address of 64-bit reg + 32-bit signed offset) into gpr (zero - // * extended) - // */ - // static Instruction load16_gpr16z_r64off32s(uint8_t dst, uint8_t src, int32_t offset) { - // Instruction instr(0x0f); - // instr.set_op2(0xb7); - // instr.set_modrm_rex_sib_for_reg_reg_disp32(dst, 2, src, true); - // instr.set_disp(Imm(4, offset)); - // return instr; - // } - // - // /*! - // * Load 16-bits from memory (at address of 64-bit reg + 32-bit signed offset) into gpr (sign - // * extended) - // */ - // static Instruction load16_gpr16s_r64off32s(uint8_t dst, uint8_t src, int32_t offset) { - // Instruction instr(0x0f); - // instr.set_op2(0xbf); - // instr.set_modrm_rex_sib_for_reg_reg_disp32(dst, 2, src, true); - // instr.set_disp(Imm(4, offset)); - // return instr; - // } - // - // /*! - // * Load 32-bits from memory (at address of 64-bit reg + 32-bit signed offset) into gpr. - // * Use the sext flag to enable sign extension. - // */ - // static Instruction load32_gpr32sz_r64off32s(uint8_t dst_reg, - // int32_t offset, - // uint8_t src_reg, - // bool sext = false) { - // Instruction instr(0x8b); - // if (sext) { - // instr.op = 0x63; - // } - // instr.set_modrm_rex_sib_for_reg_reg_disp32(dst_reg, 2, src_reg, sext); - // instr.set_disp(Imm(4, offset)); - // return instr; - // } - // - // /*! - // * Load 64-bits from memory located at 64-bit reg + 32-bit signed offset into gpr - // */ - // static Instruction load64_gpr64_r64off32s(uint8_t dst_reg, int32_t offset, uint8_t src_reg) { - // Instruction instr(0x8b); - // instr.set_modrm_rex_sib_for_reg_reg_disp32(dst_reg, 2, src_reg, true); - // instr.set_disp(Imm(4, offset)); - // return instr; - // } - // - // /*! - // * Load 32-bits form memory located at 64-bit reg + 32-bit signed offset into xmm (32-bits) - // * movss - // */ - // static Instruction load32_xmm32_r64off32s(uint8_t dst, uint8_t src, int32_t offset) { - // Instruction instr(0xf3); - // instr.set_op2(0x0f); - // instr.set_op3(0x10); - // instr.set_modrm_rex_sib_for_reg_reg_disp32(dst, 2, src, false); - // instr.set_disp(Imm(4, offset)); - // instr.swap_op0_rex(); - // return instr; - // } + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + return instr; + } + + // todo - GPR64 -> XMM64 (zext) + // todo - XMM -> GPR64 + // todo - XMM128 - XMM128 + + //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + // GOAL Loads and Stores + //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + /*! + * movsx dst, BYTE PTR [addr1 + addr2] + * addr1 and addr2 have to be different registers. + * Cannot use rsp. + */ + static Instruction load8s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + assert(dst.is_gpr()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(addr1 != addr2); + assert(addr1 != RSP); + assert(addr2 != RSP); + Instruction instr(0xf); + instr.set_op2(0xbe); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true, + false); + return instr; + } + + static Instruction store8_gpr64_gpr64_plus_gpr64(Register addr1, Register addr2, Register value) { + assert(value.is_gpr()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(addr1 != addr2); + assert(addr1 != RSP); + assert(addr2 != RSP); + Instruction instr(0x88); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(value.hw_id(), addr1.hw_id(), addr2.hw_id()); + if (value.id() > RBX) { + instr.add_rex(); + } + return instr; + } + + static Instruction load8s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + assert(dst.is_gpr()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(addr1 != addr2); + assert(addr1 != RSP); + assert(addr2 != RSP); + assert(offset >= INT8_MIN && offset <= INT8_MAX); + Instruction instr(0xf); + instr.set_op2(0xbe); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; + } + + static Instruction store8_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, + Register addr2, + Register value, + s64 offset) { + assert(value.is_gpr()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(addr1 != addr2); + assert(addr1 != RSP); + assert(addr2 != RSP); + assert(offset >= INT8_MIN && offset <= INT8_MAX); + Instruction instr(0x88); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + if (value.id() > RBX) { + instr.add_rex(); + } + return instr; + } + + static Instruction load8s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + assert(dst.is_gpr()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(addr1 != addr2); + assert(addr1 != RSP); + assert(addr2 != RSP); + assert(offset >= INT32_MIN && offset <= INT32_MAX); + Instruction instr(0xf); + instr.set_op2(0xbe); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; + } + + static Instruction store8_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, + Register addr2, + Register value, + s64 offset) { + assert(value.is_gpr()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(addr1 != addr2); + assert(addr1 != RSP); + assert(addr2 != RSP); + assert(offset >= INT32_MIN && offset <= INT32_MAX); + Instruction instr(0x88); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + if (value.id() > RBX) { + instr.add_rex(); + } + return instr; + } + + /*! + * movzx dst, BYTE PTR [addr1 + addr2] + * addr1 and addr2 have to be different registers. + * Cannot use rsp. + */ + static Instruction load8u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + assert(dst.is_gpr()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(addr1 != addr2); + assert(addr1 != RSP); + assert(addr2 != RSP); + Instruction instr(0xf); + instr.set_op2(0xb6); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true, + false); + return instr; + } + + static Instruction load8u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + assert(dst.is_gpr()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(addr1 != addr2); + assert(addr1 != RSP); + assert(addr2 != RSP); + assert(offset >= INT8_MIN && offset <= INT8_MAX); + Instruction instr(0xf); + instr.set_op2(0xb6); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; + } + + static Instruction load8u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + assert(dst.is_gpr()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(addr1 != addr2); + assert(addr1 != RSP); + assert(addr2 != RSP); + assert(offset >= INT32_MIN && offset <= INT32_MAX); + Instruction instr(0xf); + instr.set_op2(0xb6); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; + } + + /*! + * movsx dst, WORD PTR [addr1 + addr2] + * addr1 and addr2 have to be different registers. + * Cannot use rsp. + */ + static Instruction load16s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + assert(dst.is_gpr()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(addr1 != addr2); + assert(addr1 != RSP); + assert(addr2 != RSP); + Instruction instr(0xf); + instr.set_op2(0xbf); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true, + false); + return instr; + } + + static Instruction store16_gpr64_gpr64_plus_gpr64(Register addr1, + Register addr2, + Register value) { + assert(value.is_gpr()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(addr1 != addr2); + assert(addr1 != RSP); + assert(addr2 != RSP); + Instruction instr(0x66); + instr.set_op2(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(value.hw_id(), addr1.hw_id(), addr2.hw_id()); + instr.swap_op0_rex(); // why????? + return instr; + } + + static Instruction store16_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, + Register addr2, + Register value, + s64 offset) { + assert(value.is_gpr()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(addr1 != addr2); + assert(addr1 != RSP); + assert(addr2 != RSP); + assert(offset >= INT8_MIN && offset <= INT8_MAX); + Instruction instr(0x66); + instr.set_op2(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + instr.swap_op0_rex(); // why????? + return instr; + } + + static Instruction store16_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, + Register addr2, + Register value, + s64 offset) { + assert(value.is_gpr()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(addr1 != addr2); + assert(addr1 != RSP); + assert(addr2 != RSP); + assert(offset >= INT32_MIN && offset <= INT32_MAX); + Instruction instr(0x66); + instr.set_op2(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + instr.swap_op0_rex(); // why????? + return instr; + } + + static Instruction load16s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + assert(dst.is_gpr()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(addr1 != addr2); + assert(addr1 != RSP); + assert(addr2 != RSP); + assert(offset >= INT8_MIN && offset <= INT8_MAX); + Instruction instr(0xf); + instr.set_op2(0xbf); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; + } + + static Instruction load16s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + assert(dst.is_gpr()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(addr1 != addr2); + assert(addr1 != RSP); + assert(addr2 != RSP); + assert(offset >= INT32_MIN && offset <= INT32_MAX); + Instruction instr(0xf); + instr.set_op2(0xbf); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; + } + + /*! + * movzx dst, WORD PTR [addr1 + addr2] + * addr1 and addr2 have to be different registers. + * Cannot use rsp. + */ + static Instruction load16u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + assert(dst.is_gpr()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(addr1 != addr2); + assert(addr1 != RSP); + assert(addr2 != RSP); + Instruction instr(0xf); + instr.set_op2(0xb7); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true, + false); + return instr; + } + + static Instruction load16u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + assert(dst.is_gpr()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(addr1 != addr2); + assert(addr1 != RSP); + assert(addr2 != RSP); + assert(offset >= INT8_MIN && offset <= INT8_MAX); + Instruction instr(0xf); + instr.set_op2(0xb7); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; + } + + static Instruction load16u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + assert(dst.is_gpr()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(addr1 != addr2); + assert(addr1 != RSP); + assert(addr2 != RSP); + assert(offset >= INT32_MIN && offset <= INT32_MAX); + Instruction instr(0xf); + instr.set_op2(0xb7); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; + } + + /*! + * movsxd dst, DWORD PTR [addr1 + addr2] + * addr1 and addr2 have to be different registers. + * Cannot use rsp. + */ + static Instruction load32s_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + assert(dst.is_gpr()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(addr1 != addr2); + assert(addr1 != RSP); + assert(addr2 != RSP); + Instruction instr(0x63); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true); + return instr; + } + + static Instruction store32_gpr64_gpr64_plus_gpr64(Register addr1, + Register addr2, + Register value) { + assert(value.is_gpr()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(addr1 != addr2); + assert(addr1 != RSP); + assert(addr2 != RSP); + Instruction instr(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(value.hw_id(), addr1.hw_id(), addr2.hw_id()); + return instr; + } + + static Instruction load32s_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + assert(dst.is_gpr()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(addr1 != addr2); + assert(addr1 != RSP); + assert(addr2 != RSP); + assert(offset >= INT8_MIN && offset <= INT8_MAX); + Instruction instr(0x63); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; + } + + static Instruction store32_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, + Register addr2, + Register value, + s64 offset) { + assert(value.is_gpr()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(addr1 != addr2); + assert(addr1 != RSP); + assert(addr2 != RSP); + assert(offset >= INT8_MIN && offset <= INT8_MAX); + Instruction instr(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + return instr; + } + + static Instruction load32s_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + assert(dst.is_gpr()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(addr1 != addr2); + assert(addr1 != RSP); + assert(addr2 != RSP); + assert(offset >= INT32_MIN && offset <= INT32_MAX); + Instruction instr(0x63); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; + } + + static Instruction store32_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, + Register addr2, + Register value, + s64 offset) { + assert(value.is_gpr()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(addr1 != addr2); + assert(addr1 != RSP); + assert(addr2 != RSP); + assert(offset >= INT32_MIN && offset <= INT32_MAX); + Instruction instr(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + return instr; + } + + /*! + * movzxd dst, DWORD PTR [addr1 + addr2] + * addr1 and addr2 have to be different registers. + * Cannot use rsp. + */ + static Instruction load32u_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + assert(dst.is_gpr()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(addr1 != addr2); + assert(addr1 != RSP); + assert(addr2 != RSP); + Instruction instr(0x8b); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id()); + return instr; + } + + static Instruction load32u_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + assert(dst.is_gpr()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(addr1 != addr2); + assert(addr1 != RSP); + assert(addr2 != RSP); + assert(offset >= INT8_MIN && offset <= INT8_MAX); + Instruction instr(0x8b); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + return instr; + } + + static Instruction load32u_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + assert(dst.is_gpr()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(addr1 != addr2); + assert(addr1 != RSP); + assert(addr2 != RSP); + assert(offset >= INT32_MIN && offset <= INT32_MAX); + Instruction instr(0x8b); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + return instr; + } + + /*! + * mov dst, QWORD PTR [addr1 + addr2] + * addr1 and addr2 have to be different registers. + * Cannot use rsp. + */ + static Instruction load64_gpr64_gpr64_plus_gpr64(Register dst, Register addr1, Register addr2) { + assert(dst.is_gpr()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(addr1 != addr2); + assert(addr1 != RSP); + assert(addr2 != RSP); + Instruction instr(0x8b); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), true); + return instr; + } + + static Instruction store64_gpr64_gpr64_plus_gpr64(Register addr1, + Register addr2, + Register value) { + assert(value.is_gpr()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(addr1 != addr2); + assert(addr1 != RSP); + assert(addr2 != RSP); + Instruction instr(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + true); + return instr; + } + + static Instruction load64_gpr64_gpr64_plus_gpr64_plus_s8(Register dst, + Register addr1, + Register addr2, + s64 offset) { + assert(dst.is_gpr()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(addr1 != addr2); + assert(addr1 != RSP); + assert(addr2 != RSP); + assert(offset >= INT8_MIN && offset <= INT8_MAX); + Instruction instr(0x8b); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; + } + + static Instruction store64_gpr64_gpr64_plus_gpr64_plus_s8(Register addr1, + Register addr2, + Register value, + s64 offset) { + assert(value.is_gpr()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(addr1 != addr2); + assert(addr1 != RSP); + assert(addr2 != RSP); + assert(offset >= INT8_MIN && offset <= INT8_MAX); + Instruction instr(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; + } + + static Instruction load64_gpr64_gpr64_plus_gpr64_plus_s32(Register dst, + Register addr1, + Register addr2, + s64 offset) { + assert(dst.is_gpr()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(addr1 != addr2); + assert(addr1 != RSP); + assert(addr2 != RSP); + assert(offset >= INT32_MIN && offset <= INT32_MAX); + Instruction instr(0x8b); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(dst.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; + } + + static Instruction store64_gpr64_gpr64_plus_gpr64_plus_s32(Register addr1, + Register addr2, + Register value, + s64 offset) { + assert(value.is_gpr()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(addr1 != addr2); + assert(addr1 != RSP); + assert(addr2 != RSP); + assert(offset >= INT32_MIN && offset <= INT32_MAX); + Instruction instr(0x89); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(value.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, true); + return instr; + } + + static Instruction store_goal_gpr(Register addr, + Register value, + Register off, + int offset, + int size) { + switch (size) { + case 1: + if (offset == 0) { + return store8_gpr64_gpr64_plus_gpr64(addr, off, value); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return store8_gpr64_gpr64_plus_gpr64_plus_s8(addr, off, value, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return store8_gpr64_gpr64_plus_gpr64_plus_s32(addr, off, value, offset); + } else { + assert(false); + } + case 2: + if (offset == 0) { + return store16_gpr64_gpr64_plus_gpr64(addr, off, value); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return store16_gpr64_gpr64_plus_gpr64_plus_s8(addr, off, value, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return store16_gpr64_gpr64_plus_gpr64_plus_s32(addr, off, value, offset); + } else { + assert(false); + } + case 4: + if (offset == 0) { + return store32_gpr64_gpr64_plus_gpr64(addr, off, value); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return store32_gpr64_gpr64_plus_gpr64_plus_s8(addr, off, value, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return store32_gpr64_gpr64_plus_gpr64_plus_s32(addr, off, value, offset); + } else { + assert(false); + } + case 8: + if (offset == 0) { + return store64_gpr64_gpr64_plus_gpr64(addr, off, value); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return store64_gpr64_gpr64_plus_gpr64_plus_s8(addr, off, value, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return store64_gpr64_gpr64_plus_gpr64_plus_s32(addr, off, value, offset); + } else { + assert(false); + } + default: + assert(false); + } + } + + /*! + * Load memory at addr + offset, where addr is a GOAL pointer and off is the offset register. + * This will pick the appropriate fancy addressing mode instruction. + */ + static Instruction load_goal_gpr(Register dst, + Register addr, + Register off, + int offset, + int size, + bool sign_extend) { + switch (size) { + case 1: + if (offset == 0) { + if (sign_extend) { + return load8s_gpr64_gpr64_plus_gpr64(dst, addr, off); + } else { + return load8u_gpr64_gpr64_plus_gpr64(dst, addr, off); + } + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + if (sign_extend) { + return load8s_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); + } else { + return load8u_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); + } + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + if (sign_extend) { + return load8s_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); + } else { + return load8u_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); + } + } else { + assert(false); + } + case 2: + if (offset == 0) { + if (sign_extend) { + return load16s_gpr64_gpr64_plus_gpr64(dst, addr, off); + } else { + return load16u_gpr64_gpr64_plus_gpr64(dst, addr, off); + } + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + if (sign_extend) { + return load16s_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); + } else { + return load16u_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); + } + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + if (sign_extend) { + return load16s_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); + } else { + return load16u_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); + } + } else { + assert(false); + } + case 4: + if (offset == 0) { + if (sign_extend) { + return load32s_gpr64_gpr64_plus_gpr64(dst, addr, off); + } else { + return load32u_gpr64_gpr64_plus_gpr64(dst, addr, off); + } + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + if (sign_extend) { + return load32s_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); + } else { + return load32u_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); + } + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + if (sign_extend) { + return load32s_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); + } else { + return load32u_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); + } + } else { + assert(false); + } + case 8: + if (offset == 0) { + return load64_gpr64_gpr64_plus_gpr64(dst, addr, off); + + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return load64_gpr64_gpr64_plus_gpr64_plus_s8(dst, addr, off, offset); + + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return load64_gpr64_gpr64_plus_gpr64_plus_s32(dst, addr, off, offset); + + } else { + assert(false); + } + default: + assert(false); + } + } + + //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + // LOADS n' STORES - XMM32 + //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + static Instruction store32_xmm32_gpr64_plus_gpr64(Register addr1, + Register addr2, + Register xmm_value) { + assert(xmm_value.is_xmm()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + + Instruction instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x11); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(xmm_value.hw_id(), addr1.hw_id(), addr2.hw_id()); + + instr.swap_op0_rex(); + return instr; + } + + static Instruction load32_xmm32_gpr64_plus_gpr64(Register xmm_dest, + Register addr1, + Register addr2) { + assert(xmm_dest.is_xmm()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + + Instruction instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x10); + instr.set_modrm_and_rex_for_reg_plus_reg_addr(xmm_dest.hw_id(), addr1.hw_id(), addr2.hw_id()); + + instr.swap_op0_rex(); + return instr; + } + + static Instruction store32_xmm32_gpr64_plus_gpr64_plus_s8(Register addr1, + Register addr2, + Register xmm_value, + s64 offset) { + assert(xmm_value.is_xmm()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(offset >= INT8_MIN && offset <= INT8_MAX); + + Instruction instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x11); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(xmm_value.hw_id(), addr1.hw_id(), + addr2.hw_id(), offset, false); + + instr.swap_op0_rex(); + return instr; + } + + static Instruction load32_xmm32_gpr64_plus_gpr64_plus_s8(Register xmm_dest, + Register addr1, + Register addr2, + s64 offset) { + assert(xmm_dest.is_xmm()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(offset >= INT8_MIN && offset <= INT8_MAX); + + Instruction instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x10); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s8(xmm_dest.hw_id(), addr1.hw_id(), addr2.hw_id(), + offset, false); + + instr.swap_op0_rex(); + return instr; + } + + static Instruction store32_xmm32_gpr64_plus_gpr64_plus_s32(Register addr1, + Register addr2, + Register xmm_value, + s64 offset) { + assert(xmm_value.is_xmm()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(offset >= INT32_MIN && offset <= INT32_MAX); + + Instruction instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x11); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(xmm_value.hw_id(), addr1.hw_id(), + addr2.hw_id(), offset, false); + + instr.swap_op0_rex(); + return instr; + } + + static Instruction load32_xmm32_gpr64_plus_gpr64_plus_s32(Register xmm_dest, + Register addr1, + Register addr2, + s64 offset) { + assert(xmm_dest.is_xmm()); + assert(addr1.is_gpr()); + assert(addr2.is_gpr()); + assert(offset >= INT32_MIN && offset <= INT32_MAX); + + Instruction instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x10); + instr.set_modrm_and_rex_for_reg_plus_reg_plus_s32(xmm_dest.hw_id(), addr1.hw_id(), + addr2.hw_id(), offset, false); + + instr.swap_op0_rex(); + return instr; + } + + static Instruction load_goal_xmm32(Register xmm_dest, Register addr, Register off, s64 offset) { + if (offset == 0) { + return load32_xmm32_gpr64_plus_gpr64(xmm_dest, addr, off); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return load32_xmm32_gpr64_plus_gpr64_plus_s8(xmm_dest, addr, off, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return load32_xmm32_gpr64_plus_gpr64_plus_s32(xmm_dest, addr, off, offset); + } else { + assert(false); + } + } + + static Instruction store_goal_xmm32(Register addr, Register xmm_value, Register off, s64 offset) { + if (offset == 0) { + return store32_xmm32_gpr64_plus_gpr64(addr, off, xmm_value); + } else if (offset >= INT8_MIN && offset <= INT8_MAX) { + return store32_xmm32_gpr64_plus_gpr64_plus_s8(addr, off, xmm_value, offset); + } else if (offset >= INT32_MIN && offset <= INT32_MAX) { + return store32_xmm32_gpr64_plus_gpr64_plus_s32(addr, off, xmm_value, offset); + } else { + assert(false); + } + } + + //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + // LOADS n' STORES - XMM128 + //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + /*! + * Store a 128-bit xmm into an address stored in a register, no offset + */ + static Instruction store128_gpr64_xmm128(Register gpr_addr, Register xmm_value) { + assert(gpr_addr.is_gpr()); + assert(xmm_value.is_xmm()); + Instruction instr(0x66); + instr.set_op2(0x0f); + instr.set_op3(0x7f); + instr.set_modrm_and_rex_for_reg_addr(xmm_value.hw_id(), gpr_addr.hw_id(), false); + instr.swap_op0_rex(); + return instr; + } + + static Instruction load128_xmm128_gpr64(Register xmm_dest, Register gpr_addr) { + assert(gpr_addr.is_gpr()); + assert(xmm_dest.is_xmm()); + Instruction instr(0x66); + instr.set_op2(0x0f); + instr.set_op3(0x6f); + instr.set_modrm_and_rex_for_reg_addr(xmm_dest.hw_id(), gpr_addr.hw_id(), false); + instr.swap_op0_rex(); + return instr; + } + + //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + // RIP loads and stores + //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + static Instruction load64_rip_s32(Register dest, s64 offset) { + assert(dest.is_gpr()); + assert(offset >= INT32_MIN && offset <= INT32_MAX); + Instruction instr(0x8b); + instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); + return instr; + } + + static Instruction load32s_rip_s32(Register dest, s64 offset) { + assert(dest.is_gpr()); + assert(offset >= INT32_MIN && offset <= INT32_MAX); + Instruction instr(0x63); + instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); + return instr; + } + + static Instruction load32u_rip_s32(Register dest, s64 offset) { + assert(dest.is_gpr()); + assert(offset >= INT32_MIN && offset <= INT32_MAX); + Instruction instr(0x8b); + instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, false); + return instr; + } + + static Instruction load16u_rip_s32(Register dest, s64 offset) { + assert(dest.is_gpr()); + assert(offset >= INT32_MIN && offset <= INT32_MAX); + Instruction instr(0xf); + instr.set_op2(0xb7); + instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); + return instr; + } + + static Instruction load16s_rip_s32(Register dest, s64 offset) { + assert(dest.is_gpr()); + assert(offset >= INT32_MIN && offset <= INT32_MAX); + Instruction instr(0xf); + instr.set_op2(0xbf); + instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); + return instr; + } + + static Instruction load8u_rip_s32(Register dest, s64 offset) { + assert(dest.is_gpr()); + assert(offset >= INT32_MIN && offset <= INT32_MAX); + Instruction instr(0xf); + instr.set_op2(0xb6); + instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); + return instr; + } + + static Instruction load8s_rip_s32(Register dest, s64 offset) { + assert(dest.is_gpr()); + assert(offset >= INT32_MIN && offset <= INT32_MAX); + Instruction instr(0xf); + instr.set_op2(0xbe); + instr.set_modrm_and_rex_for_rip_plus_s32(dest.hw_id(), offset, true); + return instr; + } + + static Instruction static_load(Register dest, s64 offset, int size, bool sign_extend) { + switch (size) { + case 1: + if (sign_extend) { + return load8s_rip_s32(dest, offset); + } else { + return load8u_rip_s32(dest, offset); + } + break; + case 2: + if (sign_extend) { + return load16s_rip_s32(dest, offset); + } else { + return load16u_rip_s32(dest, offset); + } + break; + case 4: + if (sign_extend) { + return load32s_rip_s32(dest, offset); + } else { + return load32u_rip_s32(dest, offset); + } + break; + case 8: + return load64_rip_s32(dest, offset); + default: + assert(false); + } + } + + static Instruction store64_rip_s32(Register src, s64 offset) { + assert(src.is_gpr()); + assert(offset >= INT32_MIN && offset <= INT32_MAX); + assert(offset >= INT32_MIN && offset <= INT32_MAX); + Instruction instr(0x89); + instr.set_modrm_and_rex_for_rip_plus_s32(src.hw_id(), offset, true); + return instr; + } + + static Instruction store32_rip_s32(Register src, s64 offset) { + assert(src.is_gpr()); + assert(offset >= INT32_MIN && offset <= INT32_MAX); + assert(offset >= INT32_MIN && offset <= INT32_MAX); + Instruction instr(0x89); + instr.set_modrm_and_rex_for_rip_plus_s32(src.hw_id(), offset, false); + return instr; + } + + static Instruction store16_rip_s32(Register src, s64 offset) { + assert(src.is_gpr()); + assert(offset >= INT32_MIN && offset <= INT32_MAX); + assert(offset >= INT32_MIN && offset <= INT32_MAX); + Instruction instr(0x66); + instr.set_op2(0x89); + instr.set_modrm_and_rex_for_rip_plus_s32(src.hw_id(), offset, false); + instr.swap_op0_rex(); + return instr; + } + + static Instruction store8_rip_s32(Register src, s64 offset) { + assert(src.is_gpr()); + assert(offset >= INT32_MIN && offset <= INT32_MAX); + assert(offset >= INT32_MIN && offset <= INT32_MAX); + Instruction instr(0x88); + instr.set_modrm_and_rex_for_rip_plus_s32(src.hw_id(), offset, false); + if (src.id() > RBX) { + instr.add_rex(); + } + return instr; + } + + static Instruction static_store(Register value, s64 offset, int size) { + switch (size) { + case 1: + return store8_rip_s32(value, offset); + case 2: + return store16_rip_s32(value, offset); + case 4: + return store32_rip_s32(value, offset); + case 8: + return store64_rip_s32(value, offset); + default: + assert(false); + } + } + + static Instruction static_addr(Register dst, s64 offset) { + assert(dst.is_gpr()); + assert(offset >= INT32_MIN && offset <= INT32_MAX); + Instruction instr(0x8d); + instr.set_modrm_and_rex_for_rip_plus_s32(dst.hw_id(), offset, true); + return instr; + } + + static Instruction static_load_xmm32(Register xmm_dest, s64 offset) { + assert(xmm_dest.is_xmm()); + assert(offset >= INT32_MIN && offset <= INT32_MAX); + + Instruction instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x10); + instr.set_modrm_and_rex_for_rip_plus_s32(xmm_dest.hw_id(), offset, false); + + instr.swap_op0_rex(); + return instr; + } + + static Instruction static_store_xmm32(Register xmm_value, s64 offset) { + assert(xmm_value.is_xmm()); + assert(offset >= INT32_MIN && offset <= INT32_MAX); + + Instruction instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x11); + instr.set_modrm_and_rex_for_rip_plus_s32(xmm_value.hw_id(), offset, false); + + instr.swap_op0_rex(); + return instr; + } + + // TODO, special load/stores of 128 bit values. + + // TODO, consider specialized stack loads and stores? //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; // FUNCTION STUFF //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - /*! - * Return instruction + * Function return. Pops the 64-bit return address (real) off the stack and jumps to it. */ static Instruction ret() { return Instruction(0xc3); } /*! * Instruction to push gpr (64-bits) onto the stack */ - static Instruction push_gpr64(uint8_t reg) { - if (reg >= 8) { - auto i = Instruction(0x50 + reg - 8); + static Instruction push_gpr64(Register reg) { + assert(reg.is_gpr()); + if (reg.hw_id() >= 8) { + auto i = Instruction(0x50 + reg.hw_id() - 8); i.set(REX(false, false, false, true)); return i; } - return Instruction(0x50 + reg); + return Instruction(0x50 + reg.hw_id()); } /*! * Instruction to pop 64 bit gpr from the stack */ - static Instruction pop_gpr64(uint8_t reg) { - if (reg >= 8) { - auto i = Instruction(0x58 + reg - 8); + static Instruction pop_gpr64(Register reg) { + if (reg.hw_id() >= 8) { + auto i = Instruction(0x58 + reg.hw_id() - 8); i.set(REX(false, false, false, true)); return i; } - return Instruction(0x58 + reg); - } - - // /*! - // * Call a function stored in a 64-bit gpr - // */ - // static Instruction call_r64(uint8_t reg) { - // Instruction instr(0xff); - // if (reg >= 8) { - // instr.set(REX(false, false, false, true)); - // reg -= 8; - // } - // assert(reg < 8); - // ModRM mrm; - // mrm.rm = reg; - // mrm.reg_op = 2; - // mrm.mod = 3; - // instr.set(mrm); - // return instr; - // } - // - // /*! - // * Call a function stored in a 64-bit gpr - // */ - // static Instruction jmp_r64(uint8_t reg) { - // Instruction instr(0xff); - // if (reg >= 8) { - // instr.set(REX(false, false, false, true)); - // reg -= 8; - // } - // assert(reg < 8); - // ModRM mrm; - // mrm.rm = reg; - // mrm.reg_op = 4; - // mrm.mod = 3; - // instr.set(mrm); - // return instr; - // } - // - // //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // // INTEGER MATH - // //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // - // /*! - // * Add 64-bit registers. - // */ - // static Instruction add_gpr64_gpr64(uint8_t dst, uint8_t src) { - // Instruction instr(0x01); - // instr.set_modrm_and_rex(src, dst, 3, true); - // return instr; - // } - // - // /*! - // * Add a signed 32 bit immediate to a 64 bit register - // * TODO: determine if we can decrease to imm16? - // */ - // static Instruction add_gpr64_imm32s(uint8_t dst, int32_t offset) { - // Instruction instr(0x81); - // instr.set_modrm_and_rex(0, dst, 3, true); - // instr.set(Imm(4, offset)); - // return instr; - // } - // - // /*! - // * Add a signed 32 bit immediate to a 64 bit register - // * TODO: determine if we can decrease to imm16? - // */ - // static Instruction add_gpr64_imm8s(uint8_t dst, int8_t v) { - // Instruction instr(0x83); - // instr.set_modrm_and_rex(0, dst, 3, true); - // instr.set(Imm(1, v)); - // return instr; - // } - // - // /*! - // * Subtract 64-bit registers - // */ - // static Instruction sub_gpr64_gpr64(uint8_t dst, uint8_t src) { - // Instruction instr(0x29); - // instr.set_modrm_and_rex(src, dst, 3, true); - // return instr; - // } - // - // /*! - // * Multiply gprs (32-bit, signed). - // */ - // static Instruction imul_gpr32_gpr32(uint8_t dst, uint8_t src) { - // Instruction instr(0xf); - // instr.set_op2(0xaf); - // instr.set_modrm_and_rex(dst, src, 3, false); - // return instr; - // } - // - // /*! - // * Divide (idiv, 32 bit) - // */ - // static Instruction idiv_gpr32(uint8_t reg) { - // Instruction instr(0xf7); - // instr.set_modrm_and_rex(7, reg, 3, false); - // return instr; - // } - // - // /*! - // * Convert doubleword to quadword for division. - // * Blame Intel for this disaster. - // */ - // static Instruction cdq() { - // Instruction instr(0x99); - // return instr; - // } - // - // /*! - // * Move from gpr32 to gpr64, with sign extension. - // * Needed for division madness. - // */ - // static Instruction movsx_r64_r32(uint8_t dst, uint8_t src) { - // Instruction instr(0x63); - // instr.set_modrm_and_rex(dst, src, 3, true); - // return instr; - // } - // - // /*! - // * Compare gpr64. This sets the flags for the jumps. - // */ - // static Instruction cmp_gpr64_gpr64(uint8_t a, uint8_t b) { - // Instruction instr(0x3b); - // instr.set_modrm_and_rex(a, b, 3, true); - // return instr; - // } - // - // //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // // BIT STUFF - // //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // - // /*! - // * Or of two gprs - // */ - // static Instruction or_gpr64_gpr64(uint8_t dst, uint8_t src) { - // Instruction instr(0x0b); - // instr.set_modrm_and_rex(dst, src, 3, true); - // return instr; - // } - // - // /*! - // * And of two gprs - // */ - // static Instruction and_gpr64_gpr64(uint8_t dst, uint8_t src) { - // Instruction instr(0x23); - // instr.set_modrm_and_rex(dst, src, 3, true); - // return instr; - // } - // - // /*! - // * Xor of two gprs - // */ - // static Instruction xor_gpr64_gpr64(uint8_t dst, uint8_t src) { - // Instruction instr(0x33); - // instr.set_modrm_and_rex(dst, src, 3, true); - // return instr; - // } - // - // /*! - // * This is the way "real" compilers zero registers, so we should do it too. - // */ - // static Instruction xor_zero_gpr(uint8_t reg) { - // Instruction instr(0x31); - // instr.set_modrm_and_rex(reg, reg, 3, false); - // return instr; - // } - // - // /*! - // * Bitwise not a gpr - // */ - // static Instruction not_gpr64(uint8_t reg) { - // Instruction instr(0xf7); - // instr.set_modrm_and_rex(2, reg, 3, true); - // return instr; - // } - // - // /*! - // * Shift 64-bit gpr left by CL register - // */ - // static Instruction shl_gpr64_cl(uint8_t reg) { - // Instruction instr(0xd3); - // instr.set_modrm_and_rex(4, reg, 3, true); - // return instr; - // } - // - // /*! - // * Shift 64-bit gpr right (logical) by CL register - // */ - // static Instruction shr_gpr64_cl(uint8_t reg) { - // Instruction instr(0xd3); - // instr.set_modrm_and_rex(5, reg, 3, true); - // return instr; - // } - // - // /*! - // * Shift 64-bit gpr right (arithmetic) by CL register - // */ - // static Instruction sar_gpr64_cl(uint8_t reg) { - // Instruction instr(0xd3); - // instr.set_modrm_and_rex(7, reg, 3, true); - // return instr; - // } - // - // /*! - // * Shift 64-ptr left (logical) by the constant shift amount "sa". - // */ - // static Instruction shl_gpr64_u8(uint8_t reg, uint8_t sa) { - // Instruction instr(0xc1); - // instr.set_modrm_and_rex(4, reg, 3, true); - // instr.set(Imm(1, sa)); - // return instr; - // } - // - // /*! - // * Shift 64-ptr right (logical) by the constant shift amount "sa". - // */ - // static Instruction shr_gpr64_u8(uint8_t reg, uint8_t sa) { - // Instruction instr(0xc1); - // instr.set_modrm_and_rex(5, reg, 3, true); - // instr.set(Imm(1, sa)); - // return instr; - // } - // - // /*! - // * Shift 64-ptr right (arithmetic) by the constant shift amount "sa". - // */ - // static Instruction sar_gpr64_u8(uint8_t reg, uint8_t sa) { - // Instruction instr(0xc1); - // instr.set_modrm_and_rex(7, reg, 3, true); - // instr.set(Imm(1, sa)); - // return instr; - // } - // - // //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // // CONTROL FLOW - // //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // - // /*! - // * Jump, 32-bit constant offset. The offset is by default 0 and must be patched later. - // */ - // static Instruction jmp_32() { - // Instruction instr(0xe9); - // instr.set(Imm(4, 0)); - // return instr; - // } - // - // /*! - // * Jump if equal. - // * TODO - can we get away with 16 bits? - // */ - // static Instruction je_32() { - // Instruction instr(0x0f); - // instr.set_op2(0x84); - // instr.set(Imm(4, 0)); - // return instr; - // } - // - // /*! - // * Jump not equal. - // * TODO - can we get away with 16 bits? - // */ - // static Instruction jne_32() { - // Instruction instr(0x0f); - // instr.set_op2(0x85); - // instr.set(Imm(4, 0)); - // return instr; - // } - // - // /*! - // * Jump less than or equal. - // * TODO - can we get away with 16 bits? - // */ - // static Instruction jle_32() { - // Instruction instr(0x0f); - // instr.set_op2(0x8e); - // instr.set(Imm(4, 0)); - // return instr; - // } - // - // /*! - // * Jump greater than or equal. - // * TODO - can we get away with 16 bits? - // */ - // static Instruction jge_32() { - // Instruction instr(0x0f); - // instr.set_op2(0x8d); - // instr.set(Imm(4, 0)); - // return instr; - // } - // - // /*! - // * Jump less than - // * TODO - can we get away with 16 bits? - // */ - // static Instruction jl_32() { - // Instruction instr(0x0f); - // instr.set_op2(0x8c); - // instr.set(Imm(4, 0)); - // return instr; - // } - // - // /*! - // * Jump greater than - // * TODO - can we get away with 16 bits? - // */ - // static Instruction jg_32() { - // Instruction instr(0x0f); - // instr.set_op2(0x8f); - // instr.set(Imm(4, 0)); - // return instr; - // } - // - // /*! - // * Jump below or equal - // * TODO - can we get away with 16 bits? - // */ - // static Instruction jbe_32() { - // Instruction instr(0x0f); - // instr.set_op2(0x86); - // instr.set(Imm(4, 0)); - // return instr; - // } - // - // /*! - // * Jump above or equal - // * TODO - can we get away with 16 bits? - // */ - // static Instruction jae_32() { - // Instruction instr(0x0f); - // instr.set_op2(0x83); - // instr.set(Imm(4, 0)); - // return instr; - // } - // - // /*! - // * Jump below - // * TODO - can we get away with 16 bits? - // */ - // static Instruction jb_32() { - // Instruction instr(0x0f); - // instr.set_op2(0x82); - // instr.set(Imm(4, 0)); - // return instr; - // } - // - // /*! - // * Jump above - // * TODO - can we get away with 16 bits? - // */ - // static Instruction ja_32() { - // Instruction instr(0x0f); - // instr.set_op2(0x87); - // instr.set(Imm(4, 0)); - // return instr; - // } - // - // //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // // FLOAT MATH - // //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // - // /*! - // * Compare two floats and set flag register for jump - // */ - // static Instruction cmp_flt_flt(uint8_t a, uint8_t b) { - // Instruction instr(0x0f); - // instr.set_op2(0x2e); - // instr.set_modrm_and_rex(a, b, 3, false); - // return instr; - // } - // - // /*! - // * Multiply two floats in xmm's - // */ - // static Instruction mulss_xmm_xmm(uint8_t dst, uint8_t src) { - // Instruction instr(0xf3); - // instr.set_op2(0x0f); - // instr.set_op3(0x59); - // instr.set_modrm_and_rex(dst, src, 3, false); - // instr.swap_op0_rex(); - // return instr; - // } - // - // /*! - // * Divide two floats in xmm's - // */ - // static Instruction divss_xmm_xmm(uint8_t dst, uint8_t src) { - // Instruction instr(0xf3); - // instr.set_op2(0x0f); - // instr.set_op3(0x5e); - // instr.set_modrm_and_rex(dst, src, 3, false); - // instr.swap_op0_rex(); - // return instr; - // } - // - // /*! - // * Subtract two floats in xmm's - // */ - // static Instruction subss_xmm_xmm(uint8_t dst, uint8_t src) { - // Instruction instr(0xf3); - // instr.set_op2(0x0f); - // instr.set_op3(0x5c); - // instr.set_modrm_and_rex(dst, src, 3, false); - // instr.swap_op0_rex(); - // return instr; - // } - // - // /*! - // * Add two floats in xmm's - // */ - // static Instruction addss_xmm_xmm(uint8_t dst, uint8_t src) { - // Instruction instr(0xf3); - // instr.set_op2(0x0f); - // instr.set_op3(0x58); - // instr.set_modrm_and_rex(dst, src, 3, false); - // instr.swap_op0_rex(); - // return instr; - // } - // - // /*! - // * Convert GPR int32 to XMM float (single precision) - // */ - // static Instruction int32_to_float(uint8_t dst, uint8_t src) { - // Instruction instr(0xf3); - // instr.set_op2(0x0f); - // instr.set_op3(0x2a); - // instr.set_modrm_and_rex(dst, src, 3, false); - // instr.swap_op0_rex(); - // return instr; - // } - // - // /*! - // * Convert XMM float to GPR int32(single precision) (truncate) - // */ - // static Instruction float_to_int64(uint8_t dst, uint8_t src) { - // Instruction instr(0xf3); - // instr.set_op2(0x0f); - // instr.set_op3(0x2c); - // instr.set_modrm_and_rex(dst, src, 3, true); - // instr.swap_op0_rex(); - // return instr; - // } - // - // //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // // UTILITIES - // //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - // - // /*! - // * A "null" instruction. This instruction does not generate any bytes - // * but can be referred to by a label. Useful to insert in place of a real instruction - // * if the real instruction has been optimized out. - // */ - // static Instruction null() { - // Instruction i(0); - // i.is_null = true; - // return i; - // } - // - // /*! - // * A "function start" instruction. This emits no opcodes, but is used - // * to determine where to insert the function type tag and how to align a function. - // */ - // static Instruction function_start() { - // Instruction i(0); - // i.is_null = true; - // i.is_function_start = true; - // return i; - // } + return Instruction(0x58 + reg.hw_id()); + } + + /*! + * Call a function stored in a 64-bit gpr + */ + static Instruction call_r64(uint8_t reg) { + Instruction instr(0xff); + if (reg >= 8) { + instr.set(REX(false, false, false, true)); + reg -= 8; + } + assert(reg < 8); + ModRM mrm; + mrm.rm = reg; + mrm.reg_op = 2; + mrm.mod = 3; + instr.set(mrm); + return instr; + } + + /*! + * Call a function stored in a 64-bit gpr + */ + static Instruction jmp_r64(uint8_t reg) { + Instruction instr(0xff); + if (reg >= 8) { + instr.set(REX(false, false, false, true)); + reg -= 8; + } + assert(reg < 8); + ModRM mrm; + mrm.rm = reg; + mrm.reg_op = 4; + mrm.mod = 3; + instr.set(mrm); + return instr; + } + + //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + // INTEGER MATH + //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + static Instruction sub_gpr64_imm8s(Register reg, int64_t imm) { + assert(reg.is_gpr()); + assert(imm >= INT8_MIN && imm <= INT8_MAX); + // SUB r/m64, imm8 : REX.W + 83 /5 ib + Instruction instr(0x83); + instr.set_modrm_and_rex(5, reg.hw_id(), 3, true); + instr.set(Imm(1, imm)); + return instr; + } + + static Instruction sub_gpr64_imm32s(Register reg, int64_t imm) { + assert(reg.is_gpr()); + assert(imm >= INT32_MIN && imm <= INT32_MAX); + Instruction instr(0x81); + instr.set_modrm_and_rex(5, reg.hw_id(), 3, true); + instr.set(Imm(4, imm)); + return instr; + } + + static Instruction add_gpr64_imm8s(Register reg, int64_t v) { + assert(v >= INT8_MIN && v <= INT8_MAX); + Instruction instr(0x83); + instr.set_modrm_and_rex(0, reg.hw_id(), 3, true); + instr.set(Imm(1, v)); + return instr; + } + + static Instruction add_gpr64_imm32s(Register reg, int64_t v) { + assert(v >= INT32_MIN && v <= INT32_MAX); + Instruction instr(0x81); + instr.set_modrm_and_rex(0, reg.hw_id(), 3, true); + instr.set(Imm(4, v)); + return instr; + } + + static Instruction add_gpr64_imm(Register reg, int64_t imm) { + if (imm >= INT8_MIN && imm <= INT8_MAX) { + return add_gpr64_imm8s(reg, imm); + } else if (imm >= INT32_MIN && imm <= INT32_MAX) { + return add_gpr64_imm32s(reg, imm); + } else { + assert(false); + } + } + + static Instruction sub_gpr64_imm(Register reg, int64_t imm) { + if (imm >= INT8_MIN && imm <= INT8_MAX) { + return sub_gpr64_imm8s(reg, imm); + } else if (imm >= INT32_MIN && imm <= INT32_MAX) { + return sub_gpr64_imm32s(reg, imm); + } else { + assert(false); + } + } + + static Instruction add_gpr64_gpr64(Register dst, Register src) { + Instruction instr(0x01); + assert(dst.is_gpr()); + assert(src.is_gpr()); + instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, true); + return instr; + } + + static Instruction sub_gpr64_gpr64(Register dst, Register src) { + Instruction instr(0x29); + assert(dst.is_gpr()); + assert(src.is_gpr()); + instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, true); + return instr; + } + + /*! + * Multiply gprs (32-bit, signed). + * (Note - probably worth doing imul on gpr64's to implement the EE's unsigned multiply) + */ + static Instruction imul_gpr32_gpr32(Register dst, Register src) { + Instruction instr(0xf); + instr.set_op2(0xaf); + assert(dst.is_gpr()); + assert(src.is_gpr()); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + return instr; + } + + /*! + * Divide (idiv, 32 bit) + * todo UNTESTED + */ + static Instruction idiv_gpr32(Register reg) { + Instruction instr(0xf7); + assert(reg.is_gpr()); + instr.set_modrm_and_rex(7, reg.hw_id(), 3, false); + return instr; + } + + /*! + * Convert doubleword to quadword for division. + * todo UNTESTED + */ + static Instruction cdq() { + Instruction instr(0x99); + return instr; + } + + /*! + * Move from gpr32 to gpr64, with sign extension. + * Needed for multiplication/divsion madness. + */ + static Instruction movsx_r64_r32(Register dst, Register src) { + Instruction instr(0x63); + assert(dst.is_gpr()); + assert(src.is_gpr()); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); + return instr; + } + + /*! + * Compare gpr64. This sets the flags for the jumps. + * todo UNTESTED + */ + static Instruction cmp_gpr64_gpr64(Register a, Register b) { + Instruction instr(0x3b); + assert(a.is_gpr()); + assert(b.is_gpr()); + instr.set_modrm_and_rex(a.hw_id(), b.hw_id(), 3, true); + return instr; + } + + //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + // BIT STUFF + //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + /*! + * Or of two gprs + */ + static Instruction or_gpr64_gpr64(Register dst, Register src) { + Instruction instr(0x0b); + assert(dst.is_gpr()); + assert(src.is_gpr()); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); + return instr; + } + + /*! + * And of two gprs + */ + static Instruction and_gpr64_gpr64(Register dst, Register src) { + Instruction instr(0x23); + assert(dst.is_gpr()); + assert(src.is_gpr()); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); + return instr; + } + + /*! + * Xor of two gprs + */ + static Instruction xor_gpr64_gpr64(Register dst, Register src) { + Instruction instr(0x33); + assert(dst.is_gpr()); + assert(src.is_gpr()); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true); + return instr; + } + + /*! + * Bitwise not a gpr + */ + static Instruction not_gpr64(Register reg) { + Instruction instr(0xf7); + assert(reg.is_gpr()); + instr.set_modrm_and_rex(2, reg.hw_id(), 3, true); + return instr; + } + + //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + // SHIFTS + //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + /*! + * Shift 64-bit gpr left by CL register + */ + static Instruction shl_gpr64_cl(uint8_t reg) { + Instruction instr(0xd3); + instr.set_modrm_and_rex(4, reg, 3, true); + return instr; + } + + /*! + * Shift 64-bit gpr right (logical) by CL register + */ + static Instruction shr_gpr64_cl(uint8_t reg) { + Instruction instr(0xd3); + instr.set_modrm_and_rex(5, reg, 3, true); + return instr; + } + + /*! + * Shift 64-bit gpr right (arithmetic) by CL register + */ + static Instruction sar_gpr64_cl(uint8_t reg) { + Instruction instr(0xd3); + instr.set_modrm_and_rex(7, reg, 3, true); + return instr; + } + + /*! + * Shift 64-ptr left (logical) by the constant shift amount "sa". + */ + static Instruction shl_gpr64_u8(uint8_t reg, uint8_t sa) { + Instruction instr(0xc1); + instr.set_modrm_and_rex(4, reg, 3, true); + instr.set(Imm(1, sa)); + return instr; + } + + /*! + * Shift 64-ptr right (logical) by the constant shift amount "sa". + */ + static Instruction shr_gpr64_u8(uint8_t reg, uint8_t sa) { + Instruction instr(0xc1); + instr.set_modrm_and_rex(5, reg, 3, true); + instr.set(Imm(1, sa)); + return instr; + } + + /*! + * Shift 64-ptr right (arithmetic) by the constant shift amount "sa". + */ + static Instruction sar_gpr64_u8(uint8_t reg, uint8_t sa) { + Instruction instr(0xc1); + instr.set_modrm_and_rex(7, reg, 3, true); + instr.set(Imm(1, sa)); + return instr; + } + + //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + // CONTROL FLOW + //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + /*! + * Jump, 32-bit constant offset. The offset is by default 0 and must be patched later. + */ + static Instruction jmp_32() { + Instruction instr(0xe9); + instr.set(Imm(4, 0)); + return instr; + } + + /*! + * Jump if equal. + */ + static Instruction je_32() { + Instruction instr(0x0f); + instr.set_op2(0x84); + instr.set(Imm(4, 0)); + return instr; + } + + /*! + * Jump not equal. + */ + static Instruction jne_32() { + Instruction instr(0x0f); + instr.set_op2(0x85); + instr.set(Imm(4, 0)); + return instr; + } + + /*! + * Jump less than or equal. + */ + static Instruction jle_32() { + Instruction instr(0x0f); + instr.set_op2(0x8e); + instr.set(Imm(4, 0)); + return instr; + } + + /*! + * Jump greater than or equal. + */ + static Instruction jge_32() { + Instruction instr(0x0f); + instr.set_op2(0x8d); + instr.set(Imm(4, 0)); + return instr; + } + + /*! + * Jump less than + */ + static Instruction jl_32() { + Instruction instr(0x0f); + instr.set_op2(0x8c); + instr.set(Imm(4, 0)); + return instr; + } + + /*! + * Jump greater than + */ + static Instruction jg_32() { + Instruction instr(0x0f); + instr.set_op2(0x8f); + instr.set(Imm(4, 0)); + return instr; + } + + /*! + * Jump below or equal + */ + static Instruction jbe_32() { + Instruction instr(0x0f); + instr.set_op2(0x86); + instr.set(Imm(4, 0)); + return instr; + } + + /*! + * Jump above or equal + */ + static Instruction jae_32() { + Instruction instr(0x0f); + instr.set_op2(0x83); + instr.set(Imm(4, 0)); + return instr; + } + + /*! + * Jump below + */ + static Instruction jb_32() { + Instruction instr(0x0f); + instr.set_op2(0x82); + instr.set(Imm(4, 0)); + return instr; + } + + /*! + * Jump above + */ + static Instruction ja_32() { + Instruction instr(0x0f); + instr.set_op2(0x87); + instr.set(Imm(4, 0)); + return instr; + } + + //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + // FLOAT MATH + //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + /*! + * Compare two floats and set flag register for jump (ucomiss) + */ + static Instruction cmp_flt_flt(Register a, Register b) { + assert(a.is_xmm()); + assert(b.is_xmm()); + Instruction instr(0x0f); + instr.set_op2(0x2e); + instr.set_modrm_and_rex(a.hw_id(), b.hw_id(), 3, false); + return instr; + } + + /*! + * Multiply two floats in xmm's + */ + static Instruction mulss_xmm_xmm(Register dst, Register src) { + assert(dst.is_xmm()); + assert(src.is_xmm()); + Instruction instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x59); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; + } + + /*! + * Divide two floats in xmm's + */ + static Instruction divss_xmm_xmm(Register dst, Register src) { + assert(dst.is_xmm()); + assert(src.is_xmm()); + Instruction instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x5e); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; + } + + /*! + * Subtract two floats in xmm's + */ + static Instruction subss_xmm_xmm(Register dst, Register src) { + assert(dst.is_xmm()); + assert(src.is_xmm()); + Instruction instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x5c); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; + } + + /*! + * Add two floats in xmm's + */ + static Instruction addss_xmm_xmm(Register dst, Register src) { + assert(dst.is_xmm()); + assert(src.is_xmm()); + Instruction instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x58); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; + } + + /*! + * Convert GPR int32 to XMM float (single precision) + */ + static Instruction int32_to_float(Register dst, Register src) { + assert(dst.is_xmm()); + assert(src.is_gpr()); + Instruction instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x2a); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; + } + + /*! + * Convert XMM float to GPR int32(single precision) (truncate) + */ + static Instruction float_to_int32(Register dst, Register src) { + assert(dst.is_gpr()); + assert(src.is_xmm()); + Instruction instr(0xf3); + instr.set_op2(0x0f); + instr.set_op3(0x2c); + instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, false); + instr.swap_op0_rex(); + return instr; + } + + // eventually... + // sqrt + // rsqrt + // abs + + //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + // UTILITIES + //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + /*! + * A "null" instruction. This instruction does not generate any bytes + * but can be referred to by a label. Useful to insert in place of a real instruction + * if the real instruction has been optimized out. + */ + static Instruction null() { + Instruction i(0); + i.is_null = true; + return i; + } }; -} // namespace goal +} // namespace emitter -#endif // JAK1_IGEN_H +#endif // JAK_IGEN_H diff --git a/goalc/emitter/Instruction.h b/goalc/emitter/Instruction.h index cdf74dfe6b..aab76e1a42 100644 --- a/goalc/emitter/Instruction.h +++ b/goalc/emitter/Instruction.h @@ -1,15 +1,10 @@ -/*! - * @file Instruction.h - * x86-64 instruction encoding - */ -#ifndef JAK1_INSTRUCTION_H -#define JAK1_INSTRUCTION_H +#ifndef JAK_INSTRUCTION_H +#define JAK_INSTRUCTION_H #include #include "common/common_types.h" -namespace goal { - +namespace emitter { /*! * The ModRM byte */ @@ -174,11 +169,195 @@ struct Instruction { } } + void set_modrm_and_rex_for_reg_plus_reg_plus_s8(uint8_t reg, + uint8_t addr1, + uint8_t addr2, + s8 offset, + bool rex_w) { + bool rex_b = false, rex_r = false, rex_x = false; + bool addr1_ext = false; + bool addr2_ext = false; + + if (addr1 >= 8) { + addr1 -= 8; + addr1_ext = true; + } + + if (addr2 >= 8) { + addr2 -= 8; + addr2_ext = true; + } + + if (reg >= 8) { + reg -= 8; + rex_r = true; + } + + ModRM modrm; + modrm.mod = 1; // no disp + modrm.rm = 4; // sib! + modrm.reg_op = reg; + + SIB sib; + sib.scale = 0; + + Imm imm2(1, offset); + + // default addr1 in index + if (addr1 == 4) { + sib.index = addr2; + sib.base = addr1; + rex_x = addr2_ext; + rex_b = addr1_ext; + } else { + // addr1 in index + sib.index = addr1; + sib.base = addr2; + rex_x = addr1_ext; + rex_b = addr2_ext; + } + assert(sib.index != 4); + + if (rex_b || rex_w || rex_r || rex_x) { + set(REX(rex_w, rex_r, rex_x, rex_b)); + } + + set(modrm); + set(sib); + set_disp(imm2); + } + + void set_modrm_and_rex_for_reg_plus_reg_plus_s32(uint8_t reg, + uint8_t addr1, + uint8_t addr2, + s32 offset, + bool rex_w) { + bool rex_b = false, rex_r = false, rex_x = false; + bool addr1_ext = false; + bool addr2_ext = false; + + if (addr1 >= 8) { + addr1 -= 8; + addr1_ext = true; + } + + if (addr2 >= 8) { + addr2 -= 8; + addr2_ext = true; + } + + if (reg >= 8) { + reg -= 8; + rex_r = true; + } + + ModRM modrm; + modrm.mod = 2; // no disp + modrm.rm = 4; // sib! + modrm.reg_op = reg; + + SIB sib; + sib.scale = 0; + + Imm imm2(4, offset); + + // default addr1 in index + if (addr1 == 4) { + sib.index = addr2; + sib.base = addr1; + rex_x = addr2_ext; + rex_b = addr1_ext; + } else { + // addr1 in index + sib.index = addr1; + sib.base = addr2; + rex_x = addr1_ext; + rex_b = addr2_ext; + } + assert(sib.index != 4); + + if (rex_b || rex_w || rex_r || rex_x) { + set(REX(rex_w, rex_r, rex_x, rex_b)); + } + + set(modrm); + set(sib); + set_disp(imm2); + } + + void set_modrm_and_rex_for_reg_plus_reg_addr(uint8_t reg, + uint8_t addr1, + uint8_t addr2, + bool rex_w = false, + bool rex_always = false) { + bool rex_b = false, rex_r = false, rex_x = false; + bool addr1_ext = false; + bool addr2_ext = false; + + if (addr1 >= 8) { + addr1 -= 8; + addr1_ext = true; + } + + if (addr2 >= 8) { + addr2 -= 8; + addr2_ext = true; + } + + if (reg >= 8) { + reg -= 8; + rex_r = true; + } + + ModRM modrm; + modrm.mod = 0; // no disp + modrm.rm = 4; // sib! + modrm.reg_op = reg; + + SIB sib; + sib.scale = 0; + + if (addr1 == 5 && addr2 == 5) { + sib.index = addr1; + sib.base = addr2; + rex_x = addr1_ext; + rex_b = addr2_ext; + modrm.mod = 1; + set_disp(Imm(1, 0)); + + } else { + // default addr1 in index + bool flipped = (addr1 == 4) || (addr2 == 5); + + if (flipped) { + sib.index = addr2; + sib.base = addr1; + rex_x = addr2_ext; + rex_b = addr1_ext; + } else { + // addr1 in index + sib.index = addr1; + sib.base = addr2; + rex_x = addr1_ext; + rex_b = addr2_ext; + } + assert(sib.base != 5); + assert(sib.index != 4); + } + + if (rex_b || rex_w || rex_r || rex_x || rex_always) { + set(REX(rex_w, rex_r, rex_x, rex_b)); + } + + set(modrm); + set(sib); + } + /*! * Set modrm and rex as needed for two regs for an addressing mode. * Will set SIB if R12 or RSP indexing is used. */ - void set_modrm_and_rex_for_addr(uint8_t reg, uint8_t rm, uint8_t mod, bool rex_w = false) { + void set_modrm_and_rex_for_reg_addr(uint8_t reg, uint8_t rm, bool rex_w = false) { bool rex_b = false, rex_r = false; if (rm >= 8) { @@ -192,12 +371,10 @@ struct Instruction { } ModRM modrm; - modrm.mod = mod; + modrm.mod = 0; modrm.reg_op = reg; modrm.rm = rm; - set(modrm); - if (rm == 4) { SIB sib; sib.scale = 0; @@ -207,11 +384,38 @@ struct Instruction { set(sib); } + if (rm == 5) { + modrm.mod = 1; // 1 byte imm + set_disp(Imm(1, 0)); + } + + set(modrm); if (rex_b || rex_w || rex_r) { set(REX(rex_w, rex_r, false, rex_b)); } } + void set_modrm_and_rex_for_rip_plus_s32(uint8_t reg, s32 offset, bool rex_w = false) { + bool rex_r = false; + + if (reg >= 8) { + reg -= 8; + rex_r = true; + } + + ModRM modrm; + modrm.mod = 0; + modrm.reg_op = reg; + modrm.rm = 5; // use the RIP addressing mode + set(modrm); + + if (rex_r || rex_w) { + set(REX(rex_w, rex_r, false, false)); + } + + set_disp(Imm(4, offset)); + } + void add_rex() { if (!set_rex) { set(REX()); @@ -342,7 +546,47 @@ struct Instruction { } return count; } + + uint8_t length() const { + if (is_null) + return 0; + uint8_t count = 0; + if (set_rex) { + count++; + } + + count++; + + if (op2_set) { + count++; + } + + if (op3_set) { + count++; + } + + if (set_modrm) { + count++; + } + + if (set_sib) { + count++; + } + + if (set_disp_imm) { + for (int i = 0; i < disp.size; i++) { + count++; + } + } + + if (set_imm) { + for (int i = 0; i < imm.size; i++) { + count++; + } + } + return count; + } }; -} // namespace goal +} // namespace emitter -#endif // JAK1_INSTRUCTION_H +#endif // JAK_INSTRUCTION_H diff --git a/goalc/emitter/Register.cpp b/goalc/emitter/Register.cpp new file mode 100644 index 0000000000..30cac4b18f --- /dev/null +++ b/goalc/emitter/Register.cpp @@ -0,0 +1,33 @@ +#include "Register.h" + +namespace emitter { +RegisterInfo RegisterInfo::make_register_info() { + RegisterInfo info; + + info.m_info[RAX] = {-1, false, false, "rax"}; + info.m_info[RCX] = {3, false, false, "rcx"}; + info.m_info[RDX] = {2, false, false, "rdx"}; + info.m_info[RBX] = {-1, true, false, "rbx"}; + info.m_info[RSP] = {-1, false, true, "rsp"}; + info.m_info[RBP] = {-1, true, false, "rbp"}; + info.m_info[RSI] = {1, false, false, "rsi"}; + info.m_info[RDI] = {0, false, false, "rdi"}; + + info.m_info[R8] = {4, false, false, "r8"}; + info.m_info[R9] = {5, false, false, "r9"}; + info.m_info[R10] = {6, true, false, "r10"}; + info.m_info[R11] = {7, true, false, "r11"}; + info.m_info[R12] = {-1, true, false, "r12"}; + info.m_info[R13] = {-1, false, true, "r13"}; // pp? + info.m_info[R14] = {-1, false, true, "r14"}; // st? + info.m_info[R15] = {-1, false, true, "r15"}; // offset. + + info.m_arg_regs = std::array({RDI, RSI, RDX, RCX, R8, R9, R10, R11}); + info.m_saved_gprs = std::array({RBX, RBP, R10, R11, R12}); + info.m_saved_xmms = + std::array({XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15}); + + return info; +} + +} // namespace emitter \ No newline at end of file diff --git a/goalc/emitter/Register.h b/goalc/emitter/Register.h new file mode 100644 index 0000000000..1e46393bb3 --- /dev/null +++ b/goalc/emitter/Register.h @@ -0,0 +1,134 @@ +/*! + * @file Register.h + * Representation of an x86-64 Register. + */ + +#ifndef JAK_REGISTER_H +#define JAK_REGISTER_H + +#include +#include +#include +#include "common/common_types.h" + +namespace emitter { + +// registers by name +enum X86_REG : u8 { + RAX, // return, temp + RCX, // arg 3, temp + RDX, // arg 2, temp + RBX, // saved + + RSP, // stack pointer (special) + RBP, // saved + RSI, // arg 1, temp + RDI, // arg 0, temp + + R8, // arg 4, temp + R9, // arg 5, temp + R10, // arg 6, saved (arg in GOAL only) + R11, // arg 7, saved (arg in GOAL only) + R12, // saved + R13, // pp (special!) + R14, // st (special!) + R15, // offset (special!) + XMM0, + XMM1, + XMM2, + XMM3, + XMM4, + XMM5, + XMM6, + XMM7, + XMM8, + XMM9, + XMM10, + XMM11, + XMM12, + XMM13, + XMM14, + XMM15 +}; + +class Register { + public: + Register() = default; + + // intentionally not explicit so we can use X86_REGs in place of Registers + Register(int id) : m_id(id) {} + + bool is_xmm() const { return m_id >= XMM0 && m_id <= XMM15; } + + bool is_gpr() const { return m_id >= RAX && m_id <= R15; } + + int hw_id() const { + if (is_xmm()) { + return m_id - XMM0; + } else if (is_gpr()) { + return m_id - RAX; + } else { + assert(false); + } + return 0xff; + } + + int id() const { return m_id; } + + struct hash { + auto operator()(const Register& x) const { return std::hash()(x.m_id); } + }; + + bool operator==(const Register& x) const { return m_id == x.m_id; } + + bool operator!=(const Register& x) const { return m_id != x.m_id; } + + private: + u8 m_id = 0xff; +}; + +class RegisterInfo { + public: + static constexpr int N_ARGS = 8; + static constexpr int N_REGS = 32; + static constexpr int N_SAVED_GPRS = 5; + static constexpr int N_SAVED_XMMS = 8; + + static_assert(N_REGS - 1 == XMM15, "bad register count"); + + static RegisterInfo make_register_info(); + + struct Info { + int argument_id = -1; // -1 if not argument + bool saved = false; // does the callee save it? + bool special = false; // is it a special GOAL register? + std::string name; + }; + + const Info& get_info(Register r) const { return m_info.at(r.id()); } + + Register get_arg_reg(int id) const { return m_arg_regs.at(id); } + + Register get_saved_gpr(int id) const { return m_saved_gprs.at(id); } + + Register get_saved_xmm(int id) const { return m_saved_xmms.at(id); } + + Register get_process_reg() const { return R13; } + + Register get_st_reg() const { return R14; } + + Register get_offset_reg() const { return R15; } + + Register get_ret_reg() const { return RAX; } + + private: + RegisterInfo() = default; + std::array m_info; + std::array m_arg_regs; + std::array m_saved_gprs; + std::array m_saved_xmms; +}; + +} // namespace emitter + +#endif // JAK_REGISTER_H diff --git a/goalc/emitter/registers.cpp b/goalc/emitter/registers.cpp deleted file mode 100644 index 480386bf57..0000000000 --- a/goalc/emitter/registers.cpp +++ /dev/null @@ -1,20 +0,0 @@ -#include "registers.h" - -namespace goal { -bool is_gpr(u8 reg) { - return reg <= R15; -} - -u8 get_nth_xmm(u8 id) { - return id + XMM0; -} - -bool is_xmm(u8 reg) { - return reg >= XMM0 && reg <= XMM15; -} - -u8 xmm_to_id(u8 reg) { - return reg - 16; -} - -} // namespace goal \ No newline at end of file diff --git a/goalc/emitter/registers.h b/goalc/emitter/registers.h deleted file mode 100644 index 4d6892c189..0000000000 --- a/goalc/emitter/registers.h +++ /dev/null @@ -1,111 +0,0 @@ -/*! - * @file registers.h - * Definitions and conventions for x86-64 registers. - */ - -#ifndef JAK1_REGISTERS_H -#define JAK1_REGISTERS_H - -#include "common/common_types.h" - -namespace goal { -enum X86R : u8 { - RAX, // return, temp - RCX, // arg 3 - RDX, // arg 2 - RBX, // X saved - - RSP, // stack pointer - RBP, // X base pointer (like fp) - RSI, // arg 1 - RDI, // arg 0 - - R8, // arg 4 - R9, // arg 5, saved - R10, // arg 6, saved (arg in GOAL only) - R11, // arg 7, saved (arg in GOAL only) - R12, // X saved - pp register (like s6) - R13, // X saved - function call register (like t9) - R14, // X saved - offset (added in GOAL x86) - R15, // X saved - st (like s7) - XMM0, - XMM1, - XMM2, - XMM3, - XMM4, - XMM5, - XMM6, - XMM7, - XMM8, - XMM9, - XMM10, - XMM11, - XMM12, - XMM13, - XMM14, - XMM15 -}; - -// the argument registers of GOAL. -// We must have 8 to be compatible with GOAL's 8-argument function calls. -constexpr int ARG_REG_COUNT = 8; - -// the first 6 are shared with Linux, and the last two are unique to GOAL. -constexpr X86R ARG_REGS[ARG_REG_COUNT] = { - X86R::RDI, X86R::RSI, X86R::RDX, X86R::RCX, X86R::R8, X86R::R9, X86R::R10, X86R::R11, -}; - -// The saved registers of GOAL. Note that RSP, RBP, R12, R13, R14, R15 shouldn't be changed by the -// caller, but these are special registers and won't be allocated to hold variables. -constexpr int SAVED_REG_COUNT = 4; -constexpr X86R SAVED_REGS[SAVED_REG_COUNT] = {X86R::RBX, X86R::R9, X86R::R10, X86R::R11}; - -// special registers -constexpr X86R PP_REG = X86R::R12; -constexpr X86R FUNC_REG = X86R::R13; -constexpr X86R OFF_REG = X86R::R14; -constexpr X86R ST_REG = X86R::R15; -constexpr X86R FP_REG = X86R::RBP; -constexpr X86R RET_REG = X86R::RAX; - -// size in bytes of a pointer -constexpr int PTR_SIZE = 4; - -// size in bytes of a general purpose register -constexpr int GPR_SIZE = 8; - -constexpr const char* x86_gpr_names[] = { - "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", - "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", - "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"}; - -/* - Name Arg ID Clobber? Special - RAX - y return - RCX 3 y arg - RDX 2 y arg - RBX - n - - RSP - n stack pointer - RBP - n base pointer - RSI 1 y arg - RDI 0 y arg - - R8 4 y arg - R9 5 n arg - R10 6 n arg - R11 7 n arg - R12 - n pp - R13 - n func - R14 - n - R15 - */ - -bool is_gpr(u8 reg); -u8 get_nth_xmm(u8 id); -bool is_xmm(u8 reg); -u8 xmm_to_id(u8 reg); - -} // namespace goal - -#endif // JAK1_REGISTERS_H diff --git a/goalc/goos/Interpreter.cpp b/goalc/goos/Interpreter.cpp index 26bbc94852..3f514a19ff 100644 --- a/goalc/goos/Interpreter.cpp +++ b/goalc/goos/Interpreter.cpp @@ -85,6 +85,13 @@ Interpreter::Interpreter() { load_goos_library(); } +Interpreter::~Interpreter() { + // There are some circular references that prevent shared_ptrs from cleaning up if we + // don't do this. + global_environment.as_env()->vars.clear(); + goal_env.as_env()->vars.clear(); +} + /*! * Disable printfs on errors, to make test output look less messy. */ diff --git a/goalc/goos/Interpreter.h b/goalc/goos/Interpreter.h index 7d37dbe210..b19903e68e 100644 --- a/goalc/goos/Interpreter.h +++ b/goalc/goos/Interpreter.h @@ -15,6 +15,7 @@ namespace goos { class Interpreter { public: Interpreter(); + ~Interpreter(); void execute_repl(); void throw_eval_error(const Object& o, const std::string& err); Object eval_with_rewind(const Object& obj, const std::shared_ptr& env); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 3cff0d2440..05be29603e 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -5,8 +5,13 @@ add_executable(goalc-test test_goos.cpp test_listener_deci2.cpp test_kernel.cpp - test_CodeTester.cpp all_jak1_symbols.cpp - test_type_system.cpp) + test_type_system.cpp + test_CodeTester.cpp + test_emitter_slow.cpp + test_emitter_loads_and_store.cpp + test_emitter_xmm32.cpp + test_emitter_integer_math.cpp + ) target_link_libraries(goalc-test goos util listener runtime emitter type_system gtest) \ No newline at end of file diff --git a/test/test_CodeTester.cpp b/test/test_CodeTester.cpp index ede0aa79f4..04a56ab8a4 100644 --- a/test/test_CodeTester.cpp +++ b/test/test_CodeTester.cpp @@ -2,13 +2,16 @@ * @file test_CodeTester.cpp * Tests for the CodeTester, a tool for testing the emitter by emitting code and running it * from within the test application. + * + * These tests should just make sure the basic functionality of CodeTester works, and that it + * can generate prologues/epilogues, and execute them without crashing. */ #include "gtest/gtest.h" #include "goalc/emitter/CodeTester.h" #include "goalc/emitter/IGen.h" -using namespace goal; +using namespace emitter; TEST(CodeTester, prologue) { CodeTester tester; @@ -47,121 +50,120 @@ TEST(CodeTester, execute_push_pop_gprs) { tester.execute(); } -TEST(CodeTester, load_constant_64_and_move_gpr_gpr_64) { - std::vector u64_constants = {0, UINT64_MAX, INT64_MAX, 7, 12}; +TEST(CodeTester, xmm_store_128) { + CodeTester tester; + tester.init_code_buffer(256); + // movdqa [rbx], xmm3 + // movdqa [r14], xmm3 + // movdqa [rbx], xmm14 + // movdqa [r14], xmm13 + tester.emit(IGen::store128_gpr64_xmm128(RBX, XMM3)); + tester.emit(IGen::store128_gpr64_xmm128(R14, XMM3)); + tester.emit(IGen::store128_gpr64_xmm128(RBX, XMM14)); + tester.emit(IGen::store128_gpr64_xmm128(R14, XMM13)); + EXPECT_EQ(tester.dump_to_hex_string(), + "66 0f 7f 1b 66 41 0f 7f 1e 66 44 0f 7f 33 66 45 0f 7f 2e"); + + tester.clear(); + tester.emit(IGen::store128_gpr64_xmm128(RSP, XMM1)); + EXPECT_EQ(tester.dump_to_hex_string(), "66 0f 7f 0c 24"); // requires SIB byte. + + tester.clear(); + tester.emit(IGen::store128_gpr64_xmm128(R12, XMM13)); + EXPECT_EQ(tester.dump_to_hex_string(), "66 45 0f 7f 2c 24"); // requires SIB byte and REX byte + + tester.clear(); + tester.emit(IGen::store128_gpr64_xmm128(RBP, XMM1)); + EXPECT_EQ(tester.dump_to_hex_string(), "66 0f 7f 4d 00"); + + tester.clear(); + tester.emit(IGen::store128_gpr64_xmm128(RBP, XMM11)); + EXPECT_EQ(tester.dump_to_hex_string(), "66 44 0f 7f 5d 00"); + + tester.clear(); + tester.emit(IGen::store128_gpr64_xmm128(R13, XMM2)); + EXPECT_EQ(tester.dump_to_hex_string(), "66 41 0f 7f 55 00"); - // test we can load a 64-bit constant into all gprs, move it to any other gpr, and return it. - // rsp is skipping because that's the stack pointer and would prevent us from popping gprs after + tester.clear(); + tester.emit(IGen::store128_gpr64_xmm128(R13, XMM12)); + EXPECT_EQ(tester.dump_to_hex_string(), "66 45 0f 7f 65 00"); +} +TEST(CodeTester, sub_gpr64_imm8) { CodeTester tester; tester.init_code_buffer(256); - - for (auto constant : u64_constants) { - for (int r1 = 0; r1 < 16; r1++) { - if (r1 == RSP) { - continue; - } - - for (int r2 = 0; r2 < 16; r2++) { - if (r2 == RSP) { - continue; - } - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u64(r1, constant)); - tester.emit(IGen::mov_gpr64_gpr64(r2, r1)); - tester.emit(IGen::mov_gpr64_gpr64(RAX, r2)); - tester.emit_pop_all_gprs(true); - tester.emit_return(); - EXPECT_EQ(tester.execute(), constant); - } - } + for (int i = 0; i < 16; i++) { + tester.emit(IGen::sub_gpr64_imm8s(i, -1)); } + EXPECT_EQ(tester.dump_to_hex_string(true), + "4883E8FF4883E9FF4883EAFF4883EBFF4883ECFF4883EDFF4883EEFF4883EFFF4983E8FF4983E9FF4983EA" + "FF4983EBFF4983ECFF4983EDFF4983EEFF4983EFFF"); } -TEST(CodeTester, load_constant_32_unsigned) { - std::vector u64_constants = {0, UINT32_MAX, INT32_MAX, 7, 12}; - - // test loading 32-bit constants, with all upper 32-bits zero. - // this uses a different opcode than 64-bit loads. +TEST(CodeTester, add_gpr64_imm8) { CodeTester tester; tester.init_code_buffer(256); - - for (auto constant : u64_constants) { - for (int r1 = 0; r1 < 16; r1++) { - if (r1 == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_u32(r1, constant)); - tester.emit(IGen::mov_gpr64_gpr64(RAX, r1)); - tester.emit_pop_all_gprs(true); - tester.emit_return(); - EXPECT_EQ(tester.execute(), constant); - } + for (int i = 0; i < 16; i++) { + tester.emit(IGen::add_gpr64_imm8s(i, -1)); } + EXPECT_EQ(tester.dump_to_hex_string(true), + "4883C0FF4883C1FF4883C2FF4883C3FF4883C4FF4883C5FF4883C6FF4883C7FF4983C0FF4983C1FF4983C2" + "FF4983C3FF4983C4FF4983C5FF4983C6FF4983C7FF"); } -TEST(CodeTester, load_constant_32_signed) { - std::vector s32_constants = {0, 1, INT32_MAX, INT32_MIN, 12, -1}; - - // test loading signed 32-bit constants. for values < 0 this will sign extend. +TEST(CodeTester, xmm_load_128) { CodeTester tester; tester.init_code_buffer(256); + tester.emit(IGen::load128_xmm128_gpr64(XMM3, RBX)); + tester.emit(IGen::load128_xmm128_gpr64(XMM3, R14)); + tester.emit(IGen::load128_xmm128_gpr64(XMM14, RBX)); + tester.emit(IGen::load128_xmm128_gpr64(XMM13, R14)); + EXPECT_EQ(tester.dump_to_hex_string(), + "66 0f 6f 1b 66 41 0f 6f 1e 66 44 0f 6f 33 66 45 0f 6f 2e"); - for (auto constant : s32_constants) { - for (int r1 = 0; r1 < 16; r1++) { - if (r1 == RSP) { - continue; - } - - tester.clear(); - tester.emit_push_all_gprs(true); - tester.emit(IGen::mov_gpr64_s32(r1, constant)); - tester.emit(IGen::mov_gpr64_gpr64(RAX, r1)); - tester.emit_pop_all_gprs(true); - tester.emit_return(); - EXPECT_EQ(tester.execute(), constant); - } - } + tester.clear(); + tester.emit(IGen::load128_xmm128_gpr64(XMM1, RSP)); + EXPECT_EQ(tester.dump_to_hex_string(), "66 0f 6f 0c 24"); // requires SIB byte. + + tester.clear(); + tester.emit(IGen::load128_xmm128_gpr64(XMM13, R12)); + EXPECT_EQ(tester.dump_to_hex_string(), "66 45 0f 6f 2c 24"); // requires SIB byte and REX byte + + tester.clear(); + tester.emit(IGen::load128_xmm128_gpr64(XMM1, RBP)); + EXPECT_EQ(tester.dump_to_hex_string(), "66 0f 6f 4d 00"); + + tester.clear(); + tester.emit(IGen::load128_xmm128_gpr64(XMM11, RBP)); + EXPECT_EQ(tester.dump_to_hex_string(), "66 44 0f 6f 5d 00"); + + tester.clear(); + tester.emit(IGen::load128_xmm128_gpr64(XMM2, R13)); + EXPECT_EQ(tester.dump_to_hex_string(), "66 41 0f 6f 55 00"); + + tester.clear(); + tester.emit(IGen::load128_xmm128_gpr64(XMM12, R13)); + EXPECT_EQ(tester.dump_to_hex_string(), "66 45 0f 6f 65 00"); } -TEST(CodeTester, xmm_move) { - std::vector u32_constants = {0, INT32_MAX, UINT32_MAX, 17}; +TEST(CodeTester, push_pop_xmms) { + CodeTester tester; + tester.init_code_buffer(512); + tester.emit_push_all_xmms(); + tester.emit_pop_all_xmms(); + tester.emit_return(); + tester.execute(); +} - // test moving between xmms (32-bit) and gprs. +TEST(CodeTester, push_pop_all_the_things) { CodeTester tester; - tester.init_code_buffer(256); + tester.init_code_buffer(512); + tester.emit_push_all_xmms(); + tester.emit_push_all_gprs(); - for (auto constant : u32_constants) { - for (int r1 = 0; r1 < 16; r1++) { - if (r1 == RSP) { - continue; - } - for (int r2 = 0; r2 < 16; r2++) { - if (r2 == RSP) { - continue; - } - for (int r3 = 0; r3 < 16; r3++) { - for (int r4 = 0; r4 < 16; r4++) { - tester.clear(); - tester.emit_push_all_gprs(true); - // move constant to gpr - tester.emit(IGen::mov_gpr64_u32(r1, constant)); - // move gpr to xmm - tester.emit(IGen::movd_xmm32_gpr32(get_nth_xmm(r3), r1)); - // move xmm to xmm - tester.emit(IGen::mov_xmm32_xmm32(get_nth_xmm(r4), get_nth_xmm(r3))); - // move xmm to gpr - tester.emit(IGen::movd_gpr32_xmm32(r2, get_nth_xmm(r4))); - // return! - tester.emit(IGen::mov_gpr64_gpr64(RAX, r2)); - tester.emit_return(); - } - } - } - } - } -} \ No newline at end of file + // ... + tester.emit_pop_all_gprs(); + tester.emit_pop_all_xmms(); + tester.emit_return(); + tester.execute(); +} diff --git a/test/test_emitter_integer_math.cpp b/test/test_emitter_integer_math.cpp new file mode 100644 index 0000000000..1eb8dd926a --- /dev/null +++ b/test/test_emitter_integer_math.cpp @@ -0,0 +1,628 @@ +#include "third-party/fmt/core.h" +#include "gtest/gtest.h" +#include "goalc/emitter/CodeTester.h" +#include "goalc/emitter/IGen.h" + +using namespace emitter; + +TEST(EmitterIntegerMath, add_gpr64_imm8s) { + CodeTester tester; + tester.init_code_buffer(256); + + std::vector vals = {0, 1, -1, INT32_MIN, INT32_MAX, INT64_MIN, INT64_MAX}; + std::vector imms = {0, 1, -1, INT8_MIN, INT8_MAX}; + + // test the ones that aren't rsp + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (auto val : vals) { + for (auto imm : imms) { + auto expected = val + imm; + + tester.clear(); + tester.emit_push_all_gprs(true); + + // move initial value to register + tester.emit(IGen::mov_gpr64_gpr64(i, tester.get_c_abi_arg_reg(0))); + // do the add + tester.emit(IGen::add_gpr64_imm8s(i, imm)); + // move for return + tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); + + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + auto result = tester.execute_ret(val, 0, 0, 0); + EXPECT_EQ(result, expected); + } + } + } + + tester.clear(); + tester.emit(IGen::add_gpr64_imm8s(RSP, 12)); + EXPECT_EQ(tester.dump_to_hex_string(), "48 83 c4 0c"); +} + +TEST(EmitterIntegerMath, add_gpr64_imm32s) { + CodeTester tester; + tester.init_code_buffer(256); + + std::vector vals = {0, 1, -1, INT32_MIN, INT32_MAX, INT64_MIN, INT64_MAX}; + std::vector imms = {0, 1, -1, INT8_MIN, INT8_MAX, INT32_MIN, INT32_MAX}; + + // test the ones that aren't rsp + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (auto val : vals) { + for (auto imm : imms) { + auto expected = val + imm; + + tester.clear(); + tester.emit_push_all_gprs(true); + + // move initial value to register + tester.emit(IGen::mov_gpr64_gpr64(i, tester.get_c_abi_arg_reg(0))); + // do the add + tester.emit(IGen::add_gpr64_imm32s(i, imm)); + // move for return + tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); + + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + auto result = tester.execute_ret(val, 0, 0, 0); + EXPECT_EQ(result, expected); + } + } + } + + tester.clear(); + tester.emit(IGen::add_gpr64_imm32s(RSP, 12)); + EXPECT_EQ(tester.dump_to_hex_string(), "48 81 c4 0c 00 00 00"); +} + +TEST(EmitterIntegerMath, sub_gpr64_imm8s) { + CodeTester tester; + tester.init_code_buffer(256); + + std::vector vals = {0, 1, -1, INT32_MIN, INT32_MAX, INT64_MIN, INT64_MAX}; + std::vector imms = {0, 1, -1, INT8_MIN, INT8_MAX}; + + // test the ones that aren't rsp + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (auto val : vals) { + for (auto imm : imms) { + auto expected = val - imm; + + tester.clear(); + tester.emit_push_all_gprs(true); + + // move initial value to register + tester.emit(IGen::mov_gpr64_gpr64(i, tester.get_c_abi_arg_reg(0))); + // do the add + tester.emit(IGen::sub_gpr64_imm8s(i, imm)); + // move for return + tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); + + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + auto result = tester.execute_ret(val, 0, 0, 0); + EXPECT_EQ(result, expected); + } + } + } + + tester.clear(); + tester.emit(IGen::sub_gpr64_imm8s(RSP, 12)); + EXPECT_EQ(tester.dump_to_hex_string(), "48 83 ec 0c"); +} + +TEST(EmitterIntegerMath, sub_gpr64_imm32s) { + CodeTester tester; + tester.init_code_buffer(256); + + std::vector vals = {0, 1, -1, INT32_MIN, INT32_MAX, INT64_MIN, INT64_MAX}; + std::vector imms = {0, 1, -1, INT8_MIN, INT8_MAX, INT32_MIN, INT32_MAX}; + + // test the ones that aren't rsp + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (auto val : vals) { + for (auto imm : imms) { + auto expected = val - imm; + + tester.clear(); + tester.emit_push_all_gprs(true); + + // move initial value to register + tester.emit(IGen::mov_gpr64_gpr64(i, tester.get_c_abi_arg_reg(0))); + // do the add + tester.emit(IGen::sub_gpr64_imm32s(i, imm)); + // move for return + tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); + + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + auto result = tester.execute_ret(val, 0, 0, 0); + EXPECT_EQ(result, expected); + } + } + } + + tester.clear(); + tester.emit(IGen::sub_gpr64_imm32s(RSP, 12)); + EXPECT_EQ(tester.dump_to_hex_string(), "48 81 ec 0c 00 00 00"); +} + +TEST(EmitterIntegerMath, add_gpr64_gpr64) { + CodeTester tester; + tester.init_code_buffer(256); + std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, + INT64_MAX, 117, 32, -348473, 83747382}; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + for (auto v1 : vals) { + for (auto v2 : vals) { + auto expected = v1 + v2; + tester.clear(); + tester.emit_push_all_gprs(true); + tester.emit(IGen::mov_gpr64_u64(i, v1)); + tester.emit(IGen::mov_gpr64_u64(j, v2)); + tester.emit(IGen::add_gpr64_gpr64(i, j)); + tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); + tester.emit_pop_all_gprs(true); + tester.emit_return(); + auto result = tester.execute_ret(0, 0, 0, 0); + EXPECT_EQ(result, expected); + } + } + } + } +} + +TEST(EmitterIntegerMath, sub_gpr64_gpr64) { + CodeTester tester; + tester.init_code_buffer(256); + std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, + INT64_MAX, 117, 32, -348473, 83747382}; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + for (auto v1 : vals) { + for (auto v2 : vals) { + auto expected = v1 - v2; + tester.clear(); + tester.emit_push_all_gprs(true); + tester.emit(IGen::mov_gpr64_u64(i, v1)); + tester.emit(IGen::mov_gpr64_u64(j, v2)); + tester.emit(IGen::sub_gpr64_gpr64(i, j)); + tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); + tester.emit_pop_all_gprs(true); + tester.emit_return(); + auto result = tester.execute_ret(0, 0, 0, 0); + EXPECT_EQ(result, expected); + } + } + } + } +} + +TEST(EmitterIntegerMath, mul_gpr32_gpr32) { + CodeTester tester; + tester.init_code_buffer(256); + std::vector vals = { + 0, 1, -2, -20, 123123, INT32_MIN, INT32_MAX, INT32_MIN + 1, INT32_MAX - 1}; + + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + for (auto v1 : vals) { + for (auto v2 : vals) { + // this is kind of weird behavior, but it's what the PS2 CPU does, I think. + // the lower 32-bits of the result are sign extended, even if this sign doesn't match + // the sign of the real product. This is true for both signed and unsigned multiply. + auto expected = ((s64(v1) * s64(v2)) << 32) >> 32; + tester.clear(); + tester.emit_push_all_gprs(true); + tester.emit(IGen::mov_gpr64_u64(i, (s64)v1)); + tester.emit(IGen::mov_gpr64_u64(j, (s64)v2)); + tester.emit(IGen::imul_gpr32_gpr32(i, j)); + tester.emit(IGen::movsx_r64_r32(RAX, i)); // weird PS2 sign extend. + tester.emit_pop_all_gprs(true); + tester.emit_return(); + auto result = tester.execute_ret(0, 0, 0, 0); + EXPECT_EQ(result, expected); + if (result != expected) { + fmt::print("fail {} x {}: {}\n", v1, v2, tester.dump_to_hex_string()); + } + } + } + } + } +} + +TEST(EmitterIntegerMath, or_gpr64_gpr64) { + CodeTester tester; + tester.init_code_buffer(256); + std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, + INT64_MAX, 117, 32, -348473, 83747382}; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + for (auto v1 : vals) { + for (auto v2 : vals) { + auto expected = v1 | v2; + tester.clear(); + tester.emit_push_all_gprs(true); + tester.emit(IGen::mov_gpr64_u64(i, v1)); + tester.emit(IGen::mov_gpr64_u64(j, v2)); + tester.emit(IGen::or_gpr64_gpr64(i, j)); + tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); + tester.emit_pop_all_gprs(true); + tester.emit_return(); + auto result = tester.execute_ret(0, 0, 0, 0); + EXPECT_EQ(result, expected); + } + } + } + } +} + +TEST(EmitterIntegerMath, and_gpr64_gpr64) { + CodeTester tester; + tester.init_code_buffer(256); + std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, + INT64_MAX, 117, 32, -348473, 83747382}; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + for (auto v1 : vals) { + for (auto v2 : vals) { + auto expected = v1 & v2; + tester.clear(); + tester.emit_push_all_gprs(true); + tester.emit(IGen::mov_gpr64_u64(i, v1)); + tester.emit(IGen::mov_gpr64_u64(j, v2)); + tester.emit(IGen::and_gpr64_gpr64(i, j)); + tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); + tester.emit_pop_all_gprs(true); + tester.emit_return(); + auto result = tester.execute_ret(0, 0, 0, 0); + EXPECT_EQ(result, expected); + } + } + } + } +} + +TEST(EmitterIntegerMath, xor_gpr64_gpr64) { + CodeTester tester; + tester.init_code_buffer(256); + std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, + INT64_MAX, 117, 32, -348473, 83747382}; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + for (auto v1 : vals) { + for (auto v2 : vals) { + auto expected = v1 ^ v2; + tester.clear(); + tester.emit_push_all_gprs(true); + tester.emit(IGen::mov_gpr64_u64(i, v1)); + tester.emit(IGen::mov_gpr64_u64(j, v2)); + tester.emit(IGen::xor_gpr64_gpr64(i, j)); + tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); + tester.emit_pop_all_gprs(true); + tester.emit_return(); + auto result = tester.execute_ret(0, 0, 0, 0); + EXPECT_EQ(result, expected); + } + } + } + } +} + +TEST(EmitterIntegerMath, not_gpr64) { + CodeTester tester; + tester.init_code_buffer(256); + std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, + INT64_MAX, 117, 32, -348473, 83747382}; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + for (auto v1 : vals) { + auto expected = ~v1; + tester.clear(); + tester.emit_push_all_gprs(true); + tester.emit(IGen::mov_gpr64_u64(i, v1)); + tester.emit(IGen::not_gpr64(i)); + tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); + tester.emit_pop_all_gprs(true); + tester.emit_return(); + auto result = tester.execute_ret(0, 0, 0, 0); + EXPECT_EQ(result, expected); + } + } +} + +TEST(EmitterIntegerMath, shl_gpr64_cl) { + CodeTester tester; + tester.init_code_buffer(256); + std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, + INT64_MAX, 117, 32, -348473, 83747382}; + std::vector sas = {0, 1, 23, 53, 64}; + + for (int i = 0; i < 16; i++) { + if (i == RSP || i == RCX) { + continue; + } + for (auto v : vals) { + for (auto sa : sas) { + auto expected = v << sa; + tester.clear(); + tester.emit_push_all_gprs(true); + tester.emit(IGen::mov_gpr64_u64(i, v)); + tester.emit(IGen::mov_gpr64_u64(RCX, sa)); + tester.emit(IGen::shl_gpr64_cl(i)); + tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); + tester.emit_pop_all_gprs(true); + tester.emit_return(); + auto result = tester.execute_ret(0, 0, 0, 0); + EXPECT_EQ(result, expected); + } + } + } +} + +TEST(EmitterIntegerMath, shr_gpr64_cl) { + CodeTester tester; + tester.init_code_buffer(256); + std::vector vals = {0, 1, u64(-2), u64(INT32_MIN), INT32_MAX, u64(INT64_MIN), + INT64_MAX, 117, 32, u64(-348473), 83747382}; + std::vector sas = {0, 1, 23, 53, 64}; + + for (int i = 0; i < 16; i++) { + if (i == RSP || i == RCX) { + continue; + } + for (auto v : vals) { + for (auto sa : sas) { + auto expected = v >> sa; + tester.clear(); + tester.emit_push_all_gprs(true); + tester.emit(IGen::mov_gpr64_u64(i, v)); + tester.emit(IGen::mov_gpr64_u64(RCX, sa)); + tester.emit(IGen::shr_gpr64_cl(i)); + tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); + tester.emit_pop_all_gprs(true); + tester.emit_return(); + auto result = tester.execute_ret(0, 0, 0, 0); + EXPECT_EQ(result, expected); + } + } + } +} + +TEST(EmitterIntegerMath, sar_gpr64_cl) { + CodeTester tester; + tester.init_code_buffer(256); + std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, + INT64_MAX, 117, 32, -348473, 83747382}; + std::vector sas = {0, 1, 23, 53, 64}; + + for (int i = 0; i < 16; i++) { + if (i == RSP || i == RCX) { + continue; + } + for (auto v : vals) { + for (auto sa : sas) { + auto expected = v >> sa; + tester.clear(); + tester.emit_push_all_gprs(true); + tester.emit(IGen::mov_gpr64_u64(i, v)); + tester.emit(IGen::mov_gpr64_u64(RCX, sa)); + tester.emit(IGen::sar_gpr64_cl(i)); + tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); + tester.emit_pop_all_gprs(true); + tester.emit_return(); + auto result = tester.execute_ret(0, 0, 0, 0); + EXPECT_EQ(result, expected); + } + } + } +} + +TEST(EmitterIntegerMath, shl_gpr64_u8) { + CodeTester tester; + tester.init_code_buffer(256); + std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, + INT64_MAX, 117, 32, -348473, 83747382}; + std::vector sas = {0, 1, 23, 53, 64}; + + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + for (auto v : vals) { + for (auto sa : sas) { + auto expected = v << sa; + tester.clear(); + tester.emit_push_all_gprs(true); + tester.emit(IGen::mov_gpr64_u64(i, v)); + tester.emit(IGen::shl_gpr64_u8(i, sa)); + tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); + tester.emit_pop_all_gprs(true); + tester.emit_return(); + auto result = tester.execute_ret(0, 0, 0, 0); + EXPECT_EQ(result, expected); + } + } + } +} + +TEST(EmitterIntegerMath, shr_gpr64_u8) { + CodeTester tester; + tester.init_code_buffer(256); + std::vector vals = {0, 1, u64(-2), u64(INT32_MIN), INT32_MAX, u64(INT64_MIN), + INT64_MAX, 117, 32, u64(-348473), 83747382}; + std::vector sas = {0, 1, 23, 53, 64}; + + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + for (auto v : vals) { + for (auto sa : sas) { + auto expected = v >> sa; + tester.clear(); + tester.emit_push_all_gprs(true); + tester.emit(IGen::mov_gpr64_u64(i, v)); + tester.emit(IGen::shr_gpr64_u8(i, sa)); + tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); + tester.emit_pop_all_gprs(true); + tester.emit_return(); + auto result = tester.execute_ret(0, 0, 0, 0); + EXPECT_EQ(result, expected); + } + } + } +} + +TEST(EmitterIntegerMath, sar_gpr64_u8) { + CodeTester tester; + tester.init_code_buffer(256); + std::vector vals = {0, 1, -2, INT32_MIN, INT32_MAX, INT64_MIN, + INT64_MAX, 117, 32, -348473, 83747382}; + std::vector sas = {0, 1, 23, 53, 64}; + + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + for (auto v : vals) { + for (auto sa : sas) { + auto expected = v >> sa; + tester.clear(); + tester.emit_push_all_gprs(true); + tester.emit(IGen::mov_gpr64_u64(i, v)); + tester.emit(IGen::sar_gpr64_u8(i, sa)); + tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); + tester.emit_pop_all_gprs(true); + tester.emit_return(); + auto result = tester.execute_ret(0, 0, 0, 0); + EXPECT_EQ(result, expected); + } + } + } +} + +TEST(EmitterIntegerMath, jumps) { + CodeTester tester; + tester.init_code_buffer(256); + + std::vector reads; + + auto x = IGen::jmp_32(); + reads.push_back(tester.size() + x.offset_of_imm()); + tester.emit(x); + + x = IGen::je_32(); + reads.push_back(tester.size() + x.offset_of_imm()); + tester.emit(x); + + x = IGen::jne_32(); + reads.push_back(tester.size() + x.offset_of_imm()); + tester.emit(x); + + x = IGen::jle_32(); + reads.push_back(tester.size() + x.offset_of_imm()); + tester.emit(x); + + x = IGen::jge_32(); + reads.push_back(tester.size() + x.offset_of_imm()); + tester.emit(x); + + x = IGen::jl_32(); + reads.push_back(tester.size() + x.offset_of_imm()); + tester.emit(x); + + x = IGen::jg_32(); + reads.push_back(tester.size() + x.offset_of_imm()); + tester.emit(x); + + x = IGen::jbe_32(); + reads.push_back(tester.size() + x.offset_of_imm()); + tester.emit(x); + + x = IGen::jae_32(); + reads.push_back(tester.size() + x.offset_of_imm()); + tester.emit(x); + + x = IGen::jb_32(); + reads.push_back(tester.size() + x.offset_of_imm()); + tester.emit(x); + + x = IGen::ja_32(); + reads.push_back(tester.size() + x.offset_of_imm()); + tester.emit(x); + + for (auto off : reads) { + EXPECT_EQ(0, tester.read(off)); + } + + EXPECT_EQ(tester.dump_to_hex_string(true), + "E9000000000F84000000000F85000000000F8E000000000F8D000000000F8C000000000F8F000000000F86" + "000000000F83000000000F82000000000F8700000000"); +} + +TEST(EmitterIntegerMath, null) { + auto instr = IGen::null(); + EXPECT_EQ(0, instr.emit(nullptr)); +} \ No newline at end of file diff --git a/test/test_emitter_loads_and_store.cpp b/test/test_emitter_loads_and_store.cpp new file mode 100644 index 0000000000..02a9261800 --- /dev/null +++ b/test/test_emitter_loads_and_store.cpp @@ -0,0 +1,2513 @@ +/*! + * @file test_emitter_loads_and_stores.cpp + * Tests for the emitter which are fast (checking 100's of functions) + */ + +#include "gtest/gtest.h" +#include "goalc/emitter/CodeTester.h" +#include "goalc/emitter/IGen.h" +#include "third-party/fmt/core.h" +// +using namespace emitter; + +TEST(EmitterLoadsAndStores, load_constant_64_and_move_gpr_gpr_64) { + std::vector u64_constants = {0, UINT64_MAX, INT64_MAX, 7, 12}; + + // test we can load a 64-bit constant into all gprs, move it to any other gpr, and return it. + // rsp is skipping because that's the stack pointer and would prevent us from popping gprs after + + CodeTester tester; + tester.init_code_buffer(256); + + for (auto constant : u64_constants) { + for (int r1 = 0; r1 < 16; r1++) { + if (r1 == RSP) { + continue; + } + + for (int r2 = 0; r2 < 16; r2++) { + if (r2 == RSP) { + continue; + } + tester.clear(); + tester.emit_push_all_gprs(true); + tester.emit(IGen::mov_gpr64_u64(r1, constant)); + tester.emit(IGen::mov_gpr64_gpr64(r2, r1)); + tester.emit(IGen::mov_gpr64_gpr64(RAX, r2)); + tester.emit_pop_all_gprs(true); + tester.emit_return(); + EXPECT_EQ(tester.execute(), constant); + } + } + } +} + +TEST(EmitterLoadsAndStores, load_constant_32_unsigned) { + std::vector u64_constants = {0, UINT32_MAX, INT32_MAX, 7, 12}; + + // test loading 32-bit constants, with all upper 32-bits zero. + // this uses a different opcode than 64-bit loads. + CodeTester tester; + tester.init_code_buffer(256); + + for (auto constant : u64_constants) { + for (int r1 = 0; r1 < 16; r1++) { + if (r1 == RSP) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + tester.emit(IGen::mov_gpr64_u64(r1, UINT64_MAX)); + tester.emit(IGen::mov_gpr64_u32(r1, constant)); + tester.emit(IGen::mov_gpr64_gpr64(RAX, r1)); + tester.emit_pop_all_gprs(true); + tester.emit_return(); + EXPECT_EQ(tester.execute(), constant); + } + } +} + +TEST(EmitterLoadsAndStores, load_constant_32_signed) { + std::vector s32_constants = {0, 1, INT32_MAX, INT32_MIN, 12, -1}; + + // test loading signed 32-bit constants. for values < 0 this will sign extend. + CodeTester tester; + tester.init_code_buffer(256); + + for (auto constant : s32_constants) { + for (int r1 = 0; r1 < 16; r1++) { + if (r1 == RSP) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + tester.emit(IGen::mov_gpr64_s32(r1, constant)); + tester.emit(IGen::mov_gpr64_gpr64(RAX, r1)); + tester.emit_pop_all_gprs(true); + tester.emit_return(); + EXPECT_EQ(tester.execute(), constant); + } + } +} + +TEST(EmitterLoadsAndStores, load8s_gpr64_goal_ptr_gpr64) { + CodeTester tester; + tester.init_code_buffer(512); + + tester.clear(); + tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64(RAX, RBX, RSI)); + EXPECT_EQ(tester.dump_to_hex_string(), "48 0f be 04 1e"); + + tester.clear(); + tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64(R12, RBX, RSI)); + EXPECT_EQ(tester.dump_to_hex_string(), "4c 0f be 24 1e"); + + tester.clear(); + tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64(R12, R15, RSI)); + EXPECT_EQ(tester.dump_to_hex_string(), "4e 0f be 24 3e"); + + tester.clear(); + tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64(R12, R15, R14)); + EXPECT_EQ(tester.dump_to_hex_string(), "4f 0f be 24 3e"); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + + for (int k = 0; k < 16; k++) { + if (k == RSP) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + + // fill k with junk + if (k != i && k != j) { + tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); + } + + // load into k + tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64(k, i, j)); + + // move k to return register + tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + // prepare the memory: + u8 memory[8] = {0, 0, 0xfd, 0xfe, 0xff, 0, 0, 0}; + + // run! + EXPECT_EQ(s64(tester.execute((u64)memory, 3, 0, 0)), -2); + EXPECT_EQ(s64(tester.execute((u64)memory, 2, 0, 0)), -3); + EXPECT_EQ(s64(tester.execute((u64)memory, 4, 0, 0)), -1); + EXPECT_EQ(s64(tester.execute((u64)memory, 5, 0, 0)), 0); + iter++; + } + } + } +} + +TEST(EmitterLoadsAndStores, load8s_gpr64_gpr64_gpr64_s8) { + CodeTester tester; + tester.init_code_buffer(512); + + tester.clear(); + tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3)); + + EXPECT_EQ(tester.dump_to_hex_string(), "48 0f be 44 1e fd"); + + auto instr = IGen::load8s_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3); + u8 buff[256]; + instr.emit(buff); + EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + + for (int k = 0; k < 16; k++) { + if (k == RSP) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + + // fill k with junk + if (k != i && k != j) { + tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); + } + + // load into k + tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64_plus_s8(k, i, j, -3)); + + // move k to return register + tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + // prepare the memory: + u8 memory[8] = {0, 0, 0xfd, 0xfe, 0xff, 0, 0, 0}; + + // run! + EXPECT_EQ(s64(tester.execute((u64)memory, 3 + 3, 0, 0)), -2); + EXPECT_EQ(s64(tester.execute((u64)memory, 2 + 3, 0, 0)), -3); + EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), -1); + EXPECT_EQ(s64(tester.execute((u64)memory, 5 + 3, 0, 0)), 0); + iter++; + } + } + } +} + +TEST(EmitterLoadsAndStores, load8s_gpr64_gpr64_gpr64_s32) { + CodeTester tester; + tester.init_code_buffer(512); + + tester.clear(); + tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3)); + + EXPECT_EQ(tester.dump_to_hex_string(), "48 0f be 84 1e fd ff ff ff"); + + auto instr = IGen::load8s_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3); + u8 buff[256]; + instr.emit(buff); + EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + + for (int k = 0; k < 16; k++) { + if (k == RSP) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + + // fill k with junk + if (k != i && k != j) { + tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); + } + + // load into k + tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64_plus_s32(k, i, j, -3)); + + // move k to return register + tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + // prepare the memory: + u8 memory[8] = {0, 0, 0xfd, 0xfe, 0xff, 0, 0, 0}; + + // run! + EXPECT_EQ(s64(tester.execute((u64)memory, 3 + 3, 0, 0)), -2); + EXPECT_EQ(s64(tester.execute((u64)memory, 2 + 3, 0, 0)), -3); + EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), -1); + EXPECT_EQ(s64(tester.execute((u64)memory, 5 + 3, 0, 0)), 0); + iter++; + } + } + } +} + +TEST(EmitterLoadsAndStores, load8u_gpr64_goal_ptr_gpr64) { + CodeTester tester; + tester.init_code_buffer(512); + + tester.clear(); + tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64(RAX, RBX, RSI)); + EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b6 04 1e"); + + tester.clear(); + tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64(R12, RBX, RSI)); + EXPECT_EQ(tester.dump_to_hex_string(), "4c 0f b6 24 1e"); + + tester.clear(); + tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64(R12, R15, RSI)); + EXPECT_EQ(tester.dump_to_hex_string(), "4e 0f b6 24 3e"); + + tester.clear(); + tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64(R12, R15, R14)); + EXPECT_EQ(tester.dump_to_hex_string(), "4f 0f b6 24 3e"); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + + for (int k = 0; k < 16; k++) { + if (k == RSP) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + + // fill k with junk + if (k != i && k != j) { + tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); + } + + // load into k + tester.emit(IGen::load8s_gpr64_gpr64_plus_gpr64(k, i, j)); + + // move k to return register + tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + // prepare the memory: + u8 memory[8] = {0, 0, 0xfd, 0xfe, 0xff, 0, 0, 0}; + + // run! + EXPECT_EQ(s64(tester.execute((u64)memory, 3, 0, 0)), -2); + EXPECT_EQ(s64(tester.execute((u64)memory, 2, 0, 0)), -3); + EXPECT_EQ(s64(tester.execute((u64)memory, 4, 0, 0)), -1); + EXPECT_EQ(s64(tester.execute((u64)memory, 5, 0, 0)), 0); + iter++; + } + } + } +} + +TEST(EmitterLoadsAndStores, load8u_gpr64_gpr64_gpr64_s8) { + CodeTester tester; + tester.init_code_buffer(512); + + tester.clear(); + tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3)); + + EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b6 44 1e fd"); + + auto instr = IGen::load8u_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3); + u8 buff[256]; + instr.emit(buff); + EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + + for (int k = 0; k < 16; k++) { + if (k == RSP) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + + // fill k with junk + if (k != i && k != j) { + tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); + } + + // load into k + tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64_plus_s8(k, i, j, -3)); + + // move k to return register + tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + // prepare the memory: + u8 memory[8] = {0, 0, 0xfd, 0xfe, 0xff, 0, 0, 0}; + + // run! + EXPECT_EQ(s64(tester.execute((u64)memory, 3 + 3, 0, 0)), 0xfe); + EXPECT_EQ(s64(tester.execute((u64)memory, 2 + 3, 0, 0)), 0xfd); + EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), 0xff); + EXPECT_EQ(s64(tester.execute((u64)memory, 5 + 3, 0, 0)), 0); + iter++; + } + } + } +} + +TEST(EmitterLoadsAndStores, load8u_gpr64_gpr64_gpr64_s32) { + CodeTester tester; + tester.init_code_buffer(512); + + tester.clear(); + tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3)); + + EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b6 84 1e fd ff ff ff"); + + auto instr = IGen::load8u_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3); + u8 buff[256]; + instr.emit(buff); + EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + + for (int k = 0; k < 16; k++) { + if (k == RSP) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + + // fill k with junk + if (k != i && k != j) { + tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); + } + + // load into k + tester.emit(IGen::load8u_gpr64_gpr64_plus_gpr64_plus_s32(k, i, j, -3)); + + // move k to return register + tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + // prepare the memory: + u8 memory[8] = {0, 0, 0xfd, 0xfe, 0xff, 0, 0, 0}; + + // run! + EXPECT_EQ(s64(tester.execute((u64)memory, 3 + 3, 0, 0)), 0xfe); + EXPECT_EQ(s64(tester.execute((u64)memory, 2 + 3, 0, 0)), 0xfd); + EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), 0xff); + EXPECT_EQ(s64(tester.execute((u64)memory, 5 + 3, 0, 0)), 0); + iter++; + } + } + } +} + +TEST(EmitterLoadsAndStores, load16s_gpr64_goal_ptr_gpr64) { + CodeTester tester; + tester.init_code_buffer(512); + + tester.clear(); + tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64(RAX, RBX, RSI)); + EXPECT_EQ(tester.dump_to_hex_string(), "48 0f bf 04 1e"); + + tester.clear(); + tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64(R12, RBX, RSI)); + EXPECT_EQ(tester.dump_to_hex_string(), "4c 0f bf 24 1e"); + + tester.clear(); + tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64(R12, R15, RSI)); + EXPECT_EQ(tester.dump_to_hex_string(), "4e 0f bf 24 3e"); + + tester.clear(); + tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64(R12, R15, R14)); + EXPECT_EQ(tester.dump_to_hex_string(), "4f 0f bf 24 3e"); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + + for (int k = 0; k < 16; k++) { + if (k == RSP) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + + // fill k with junk + if (k != i && k != j) { + tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); + } + + // load into k + tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64(k, i, j)); + + // move k to return register + tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + // prepare the memory: + s16 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; + + // run! + EXPECT_EQ(s64(tester.execute((u64)memory, 6, 0, 0)), -2); + EXPECT_EQ(s64(tester.execute((u64)memory, 4, 0, 0)), -3); + EXPECT_EQ(s64(tester.execute((u64)memory, 8, 0, 0)), -1); + EXPECT_EQ(s64(tester.execute((u64)memory, 10, 0, 0)), 0); + iter++; + } + } + } +} + +TEST(EmitterLoadsAndStores, load16s_gpr64_gpr64_plus_gpr64_plus_s8) { + CodeTester tester; + tester.init_code_buffer(512); + + tester.clear(); + tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3)); + + EXPECT_EQ(tester.dump_to_hex_string(), "48 0f bf 44 1e fd"); + + auto instr = IGen::load16s_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3); + u8 buff[256]; + instr.emit(buff); + EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + + for (int k = 0; k < 16; k++) { + if (k == RSP) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + + // fill k with junk + if (k != i && k != j) { + tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); + } + + // load into k + tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64_plus_s8(k, i, j, -3)); + + // move k to return register + tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + // prepare the memory: + u16 memory[8] = {0, 0, 0xfffd, 0xfffe, 0xffff, 0, 0, 0}; + + // run! + EXPECT_EQ(s64(tester.execute((u64)memory, 6 + 3, 0, 0)), -2); + EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), -3); + EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), -1); + EXPECT_EQ(s64(tester.execute((u64)memory, 10 + 3, 0, 0)), 0); + iter++; + } + } + } +} + +TEST(EmitterLoadsAndStores, load16s_gpr64_gpr64_plus_gpr64_plus_s32) { + CodeTester tester; + tester.init_code_buffer(512); + + tester.clear(); + tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3)); + + EXPECT_EQ(tester.dump_to_hex_string(), "48 0f bf 84 1e fd ff ff ff"); + + auto instr = IGen::load16s_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3); + u8 buff[256]; + instr.emit(buff); + EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + + for (int k = 0; k < 16; k++) { + if (k == RSP) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + + // fill k with junk + if (k != i && k != j) { + tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); + } + + // load into k + tester.emit(IGen::load16s_gpr64_gpr64_plus_gpr64_plus_s32(k, i, j, -3)); + + // move k to return register + tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + // prepare the memory: + u16 memory[8] = {0, 0, 0xfffd, 0xfffe, 0xffff, 0, 0, 0}; + + // run! + EXPECT_EQ(s64(tester.execute((u64)memory, 6 + 3, 0, 0)), -2); + EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), -3); + EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), -1); + EXPECT_EQ(s64(tester.execute((u64)memory, 10 + 3, 0, 0)), 0); + iter++; + } + } + } +} + +TEST(EmitterLoadsAndStores, load16u_gpr64_goal_ptr_gpr64) { + CodeTester tester; + tester.init_code_buffer(512); + + tester.clear(); + tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64(RAX, RBX, RSI)); + EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b7 04 1e"); + + tester.clear(); + tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64(R12, RBX, RSI)); + EXPECT_EQ(tester.dump_to_hex_string(), "4c 0f b7 24 1e"); + + tester.clear(); + tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64(R12, R15, RSI)); + EXPECT_EQ(tester.dump_to_hex_string(), "4e 0f b7 24 3e"); + + tester.clear(); + tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64(R12, R15, R14)); + EXPECT_EQ(tester.dump_to_hex_string(), "4f 0f b7 24 3e"); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + + for (int k = 0; k < 16; k++) { + if (k == RSP) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + + // fill k with junk + if (k != i && k != j) { + tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); + } + + // load into k + tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64(k, i, j)); + + // move k to return register + tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + // prepare the memory: + s16 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; + + // run! + EXPECT_EQ(s64(tester.execute((u64)memory, 6, 0, 0)), 0xfffe); + EXPECT_EQ(s64(tester.execute((u64)memory, 4, 0, 0)), 0xfffd); + EXPECT_EQ(s64(tester.execute((u64)memory, 8, 0, 0)), 0xffff); + EXPECT_EQ(s64(tester.execute((u64)memory, 10, 0, 0)), 0); + iter++; + } + } + } +} + +TEST(EmitterLoadsAndStores, load16u_gpr64_gpr64_plus_gpr64_plus_s8) { + CodeTester tester; + tester.init_code_buffer(512); + + tester.clear(); + tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3)); + + EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b7 44 1e fd"); + + auto instr = IGen::load16u_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3); + u8 buff[256]; + instr.emit(buff); + EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + + for (int k = 0; k < 16; k++) { + if (k == RSP) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + + // fill k with junk + if (k != i && k != j) { + tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); + } + + // load into k + tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64_plus_s8(k, i, j, -3)); + + // move k to return register + tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + // prepare the memory: + u16 memory[8] = {0, 0, 0xfffd, 0xfffe, 0xffff, 0, 0, 0}; + + // run! + EXPECT_EQ(s64(tester.execute((u64)memory, 6 + 3, 0, 0)), 0xfffe); + EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), 0xfffd); + EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), 0xffff); + EXPECT_EQ(s64(tester.execute((u64)memory, 10 + 3, 0, 0)), 0); + iter++; + } + } + } +} + +TEST(EmitterLoadsAndStores, load16u_gpr64_gpr64_plus_gpr64_plus_s32) { + CodeTester tester; + tester.init_code_buffer(512); + + tester.clear(); + tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3)); + + EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b7 84 1e fd ff ff ff"); + + auto instr = IGen::load16u_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3); + u8 buff[256]; + instr.emit(buff); + EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + + for (int k = 0; k < 16; k++) { + if (k == RSP) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + + // fill k with junk + if (k != i && k != j) { + tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); + } + + // load into k + tester.emit(IGen::load16u_gpr64_gpr64_plus_gpr64_plus_s32(k, i, j, -3)); + + // move k to return register + tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + // prepare the memory: + u16 memory[8] = {0, 0, 0xfffd, 0xfffe, 0xffff, 0, 0, 0}; + + // run! + EXPECT_EQ(s64(tester.execute((u64)memory, 6 + 3, 0, 0)), 0xfffe); + EXPECT_EQ(s64(tester.execute((u64)memory, 4 + 3, 0, 0)), 0xfffd); + EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), 0xffff); + EXPECT_EQ(s64(tester.execute((u64)memory, 10 + 3, 0, 0)), 0); + iter++; + } + } + } +} + +TEST(EmitterLoadsAndStores, load32s_gpr64_goal_ptr_gpr64) { + CodeTester tester; + tester.init_code_buffer(512); + + tester.clear(); + tester.emit(IGen::load32s_gpr64_gpr64_plus_gpr64(RAX, RBX, RSI)); + EXPECT_EQ(tester.dump_to_hex_string(), "48 63 04 1e"); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + + for (int k = 0; k < 16; k++) { + if (k == RSP) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + + // fill k with junk + if (k != i && k != j) { + tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); + } + + // load into k + tester.emit(IGen::load32s_gpr64_gpr64_plus_gpr64(k, i, j)); + + // move k to return register + tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + // prepare the memory: + s32 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; + + // run! + EXPECT_EQ(s64(tester.execute((u64)memory, 12, 0, 0)), -2); + EXPECT_EQ(s64(tester.execute((u64)memory, 8, 0, 0)), -3); + EXPECT_EQ(s64(tester.execute((u64)memory, 16, 0, 0)), -1); + EXPECT_EQ(s64(tester.execute((u64)memory, 20, 0, 0)), 0); + iter++; + } + } + } +} + +TEST(EmitterLoadsAndStores, load32s_gpr64_gpr64_plus_gpr64_plus_s8) { + CodeTester tester; + tester.init_code_buffer(512); + + tester.clear(); + tester.emit(IGen::load32s_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3)); + + EXPECT_EQ(tester.dump_to_hex_string(), "48 63 44 1e fd"); + + auto instr = IGen::load32s_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3); + u8 buff[256]; + instr.emit(buff); + EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + + for (int k = 0; k < 16; k++) { + if (k == RSP) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + + // fill k with junk + if (k != i && k != j) { + tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); + } + + // load into k + tester.emit(IGen::load32s_gpr64_gpr64_plus_gpr64_plus_s8(k, i, j, -3)); + + // move k to return register + tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + // prepare the memory: + u32 memory[8] = {0, 0, 0xfffffffd, 0xfffffffe, 0xffffffff, 0, 0, 0}; + + // run! + EXPECT_EQ(s64(tester.execute((u64)memory, 12 + 3, 0, 0)), -2); + EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), -3); + EXPECT_EQ(s64(tester.execute((u64)memory, 16 + 3, 0, 0)), -1); + EXPECT_EQ(s64(tester.execute((u64)memory, 20 + 3, 0, 0)), 0); + iter++; + } + } + } +} + +TEST(EmitterLoadsAndStores, load32s_gpr64_gpr64_plus_gpr64_plus_s32) { + CodeTester tester; + tester.init_code_buffer(512); + + tester.clear(); + tester.emit(IGen::load32s_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3)); + + EXPECT_EQ(tester.dump_to_hex_string(), "48 63 84 1e fd ff ff ff"); + + auto instr = IGen::load32s_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3); + u8 buff[256]; + instr.emit(buff); + EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + + for (int k = 0; k < 16; k++) { + if (k == RSP) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + + // fill k with junk + if (k != i && k != j) { + tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); + } + + // load into k + tester.emit(IGen::load32s_gpr64_gpr64_plus_gpr64_plus_s32(k, i, j, -3)); + + // move k to return register + tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + // prepare the memory: + u32 memory[8] = {0, 0, 0xfffffffd, 0xfffffffe, 0xffffffff, 0, 0, 0}; + + // run! + EXPECT_EQ(s64(tester.execute((u64)memory, 12 + 3, 0, 0)), -2); + EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), -3); + EXPECT_EQ(s64(tester.execute((u64)memory, 16 + 3, 0, 0)), -1); + EXPECT_EQ(s64(tester.execute((u64)memory, 20 + 3, 0, 0)), 0); + iter++; + } + } + } +} + +TEST(EmitterLoadsAndStores, load32u_gpr64_goal_ptr_gpr64) { + CodeTester tester; + tester.init_code_buffer(512); + + tester.clear(); + tester.emit(IGen::load32u_gpr64_gpr64_plus_gpr64(RAX, RBX, RSI)); + EXPECT_EQ(tester.dump_to_hex_string(), "8b 04 1e"); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + + for (int k = 0; k < 16; k++) { + if (k == RSP) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + + // fill k with junk + if (k != i && k != j) { + tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); + } + + // load into k + tester.emit(IGen::load32u_gpr64_gpr64_plus_gpr64(k, i, j)); + + // move k to return register + tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + // prepare the memory: + s32 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; + + // run! + EXPECT_EQ(s64(tester.execute((u64)memory, 12, 0, 0)), 0xfffffffe); + EXPECT_EQ(s64(tester.execute((u64)memory, 8, 0, 0)), 0xfffffffd); + EXPECT_EQ(s64(tester.execute((u64)memory, 16, 0, 0)), 0xffffffff); + EXPECT_EQ(s64(tester.execute((u64)memory, 20, 0, 0)), 0); + iter++; + } + } + } +} + +TEST(EmitterLoadsAndStores, load32u_gpr64_gpr64_plus_gpr64_plus_s8) { + CodeTester tester; + tester.init_code_buffer(512); + + tester.clear(); + tester.emit(IGen::load32u_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3)); + + EXPECT_EQ(tester.dump_to_hex_string(), "8b 44 1e fd"); + + auto instr = IGen::load32u_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3); + u8 buff[256]; + instr.emit(buff); + EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + + for (int k = 0; k < 16; k++) { + if (k == RSP) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + + // fill k with junk + if (k != i && k != j) { + tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); + } + + // load into k + tester.emit(IGen::load32u_gpr64_gpr64_plus_gpr64_plus_s8(k, i, j, -3)); + + // move k to return register + tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + // prepare the memory: + s32 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; + + // run! + EXPECT_EQ(s64(tester.execute((u64)memory, 12 + 3, 0, 0)), 0xfffffffe); + EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), 0xfffffffd); + EXPECT_EQ(s64(tester.execute((u64)memory, 16 + 3, 0, 0)), 0xffffffff); + EXPECT_EQ(s64(tester.execute((u64)memory, 20 + 3, 0, 0)), 0); + iter++; + } + } + } +} + +TEST(EmitterLoadsAndStores, load32u_gpr64_gpr64_plus_gpr64_plus_s32) { + CodeTester tester; + tester.init_code_buffer(512); + + tester.clear(); + tester.emit(IGen::load32u_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3)); + + EXPECT_EQ(tester.dump_to_hex_string(), "8b 84 1e fd ff ff ff"); + + auto instr = IGen::load32u_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3); + u8 buff[256]; + instr.emit(buff); + EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + + for (int k = 0; k < 16; k++) { + if (k == RSP) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + + // fill k with junk + if (k != i && k != j) { + tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); + } + + // load into k + tester.emit(IGen::load32u_gpr64_gpr64_plus_gpr64_plus_s32(k, i, j, -3)); + + // move k to return register + tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + // prepare the memory: + u32 memory[8] = {0, 0, 0xfffffffd, 0xfffffffe, 0xffffffff, 0, 0, 0}; + + // run! + EXPECT_EQ(s64(tester.execute((u64)memory, 12 + 3, 0, 0)), 0xfffffffe); + EXPECT_EQ(s64(tester.execute((u64)memory, 8 + 3, 0, 0)), 0xfffffffd); + EXPECT_EQ(s64(tester.execute((u64)memory, 16 + 3, 0, 0)), 0xffffffff); + EXPECT_EQ(s64(tester.execute((u64)memory, 20 + 3, 0, 0)), 0); + iter++; + } + } + } +} + +TEST(EmitterLoadsAndStores, load64_gpr64_goal_ptr_gpr64) { + CodeTester tester; + tester.init_code_buffer(512); + + tester.clear(); + tester.emit(IGen::load64_gpr64_gpr64_plus_gpr64(RAX, RBX, RSI)); + EXPECT_EQ(tester.dump_to_hex_string(), "48 8b 04 1e"); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + + for (int k = 0; k < 16; k++) { + if (k == RSP) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + + // fill k with junk + if (k != i && k != j) { + tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); + } + + // load into k + tester.emit(IGen::load64_gpr64_gpr64_plus_gpr64(k, i, j)); + + // move k to return register + tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + // prepare the memory: + s64 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; + + // run! + EXPECT_EQ(s64(tester.execute((u64)memory, 24, 0, 0)), -2); + EXPECT_EQ(s64(tester.execute((u64)memory, 16, 0, 0)), -3); + EXPECT_EQ(s64(tester.execute((u64)memory, 32, 0, 0)), -1); + EXPECT_EQ(s64(tester.execute((u64)memory, 40, 0, 0)), 0); + iter++; + } + } + } +} + +TEST(EmitterLoadsAndStores, load64_gpr64_gpr64_plus_gpr64_plus_s8) { + CodeTester tester; + tester.init_code_buffer(512); + + tester.clear(); + tester.emit(IGen::load64_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3)); + + EXPECT_EQ(tester.dump_to_hex_string(), "48 8b 44 1e fd"); + + auto instr = IGen::load64_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RBX, RSI, -3); + u8 buff[256]; + instr.emit(buff); + EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + + for (int k = 0; k < 16; k++) { + if (k == RSP) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + + // fill k with junk + if (k != i && k != j) { + tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); + } + + // load into k + tester.emit(IGen::load64_gpr64_gpr64_plus_gpr64_plus_s8(k, i, j, -3)); + + // move k to return register + tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + // prepare the memory: + s64 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; + + // run! + EXPECT_EQ(s64(tester.execute((u64)memory, 24 + 3, 0, 0)), -2); + EXPECT_EQ(s64(tester.execute((u64)memory, 16 + 3, 0, 0)), -3); + EXPECT_EQ(s64(tester.execute((u64)memory, 32 + 3, 0, 0)), -1); + EXPECT_EQ(s64(tester.execute((u64)memory, 40 + 3, 0, 0)), 0); + iter++; + } + } + } +} + +TEST(EmitterLoadsAndStores, load64_gpr64_gpr64_plus_gpr64_plus_s32) { + CodeTester tester; + tester.init_code_buffer(512); + + tester.clear(); + tester.emit(IGen::load64_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3)); + + EXPECT_EQ(tester.dump_to_hex_string(), "48 8b 84 1e fd ff ff ff"); + + auto instr = IGen::load64_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RBX, RSI, -3); + u8 buff[256]; + instr.emit(buff); + EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + + for (int k = 0; k < 16; k++) { + if (k == RSP) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + + // fill k with junk + if (k != i && k != j) { + tester.emit(IGen::mov_gpr64_u64(k, (iter & 1) ? 0 : UINT64_MAX)); + } + + // load into k + tester.emit(IGen::load64_gpr64_gpr64_plus_gpr64_plus_s32(k, i, j, -3)); + + // move k to return register + tester.emit(IGen::mov_gpr64_gpr64(RAX, k)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + // prepare the memory: + s64 memory[8] = {0, 0, -3, -2, -1, 0, 0, 0}; + + // run! + EXPECT_EQ(s64(tester.execute((u64)memory, 24 + 3, 0, 0)), -2); + EXPECT_EQ(s64(tester.execute((u64)memory, 16 + 3, 0, 0)), -3); + EXPECT_EQ(s64(tester.execute((u64)memory, 32 + 3, 0, 0)), -1); + EXPECT_EQ(s64(tester.execute((u64)memory, 40 + 3, 0, 0)), 0); + iter++; + } + } + } +} + +TEST(EmitterLoadsAndStores, store8_gpr64_gpr64_plus_gpr64) { + CodeTester tester; + tester.init_code_buffer(512); + + tester.clear(); + tester.emit(IGen::store8_gpr64_gpr64_plus_gpr64(RAX, RCX, RDX)); + EXPECT_EQ(tester.dump_to_hex_string(), "88 14 01"); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + + for (int k = 0; k < 16; k++) { + if (k == RSP || k == j || k == i) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + + // store! + tester.emit(IGen::store8_gpr64_gpr64_plus_gpr64(i, j, k)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + // prepare the memory: + s8 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + + // run! + tester.execute((u64)memory, 3, 0xffffffffffffff07, 0); + EXPECT_EQ(memory[2], 3); + EXPECT_EQ(memory[3], 7); + EXPECT_EQ(memory[4], 1); + + if (memory[3] != 7) { + fmt::print("test {}, {}, {}\n", tester.reg_name(i), tester.reg_name(j), + tester.reg_name(k)); + printf("%s\n", tester.dump_to_hex_string().c_str()); + } + iter++; + } + } + } +} + +TEST(EmitterLoadsAndStores, store8_gpr64_gpr64_plus_gpr64_plus_s8) { + CodeTester tester; + tester.init_code_buffer(512); + + tester.clear(); + tester.emit(IGen::store8_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, RDX, 12)); + EXPECT_EQ(tester.dump_to_hex_string(), "88 54 01 0c"); + + auto instr = IGen::store8_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, RDX, -3); + u8 buff[256]; + instr.emit(buff); + EXPECT_EQ(*(s8*)(buff + instr.offset_of_disp()), -3); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + + for (int k = 0; k < 16; k++) { + if (k == RSP || k == j || k == i) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + + // store + tester.emit(IGen::store8_gpr64_gpr64_plus_gpr64_plus_s8(i, j, k, -3)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + // prepare the memory: + s8 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + + // run! + tester.execute((u64)memory, 6, 0xffffffffffffff07, 0); + EXPECT_EQ(memory[2], 3); + EXPECT_EQ(memory[3], 7); + EXPECT_EQ(memory[4], 1); + + if (memory[3] != 7) { + fmt::print("test {}, {}, {}\n", tester.reg_name(i), tester.reg_name(j), + tester.reg_name(k)); + printf("%s\n", tester.dump_to_hex_string().c_str()); + } + iter++; + } + } + } +} + +TEST(EmitterLoadsAndStores, store8_gpr64_gpr64_plus_gpr64_plus_s32) { + CodeTester tester; + tester.init_code_buffer(512); + + tester.clear(); + tester.emit(IGen::store8_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, RDX, 12)); + EXPECT_EQ(tester.dump_to_hex_string(), "88 94 01 0c 00 00 00"); + + auto instr = IGen::store8_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, RDX, -3); + u8 buff[256]; + instr.emit(buff); + EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -3); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + + for (int k = 0; k < 16; k++) { + if (k == RSP || k == j || k == i) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + + // store + tester.emit(IGen::store8_gpr64_gpr64_plus_gpr64_plus_s32(i, j, k, -3)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + // prepare the memory: + s8 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + + // run! + tester.execute((u64)memory, 6, 0xffffffffffffff07, 0); + EXPECT_EQ(memory[2], 3); + EXPECT_EQ(memory[3], 7); + EXPECT_EQ(memory[4], 1); + + if (memory[3] != 7) { + fmt::print("test {}, {}, {}\n", tester.reg_name(i), tester.reg_name(j), + tester.reg_name(k)); + printf("%s\n", tester.dump_to_hex_string().c_str()); + } + iter++; + } + } + } +} + +TEST(EmitterLoadsAndStores, store16_gpr64_gpr64_plus_gpr64) { + CodeTester tester; + tester.init_code_buffer(512); + + tester.clear(); + tester.emit(IGen::store16_gpr64_gpr64_plus_gpr64(RCX, RAX, R8)); + EXPECT_EQ(tester.dump_to_hex_string(), "66 44 89 04 08"); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + + for (int k = 0; k < 16; k++) { + if (k == RSP || k == j || k == i) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + + // store! + tester.emit(IGen::store16_gpr64_gpr64_plus_gpr64(i, j, k)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + // prepare the memory: + s16 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + + // run! + tester.execute((u64)memory, 6, 0xffffffffffffff07, 0); + EXPECT_EQ(memory[2], 3); + EXPECT_EQ(memory[3], s16(0xff07)); + EXPECT_EQ(memory[4], 1); + + iter++; + } + } + } +} + +TEST(EmitterLoadsAndStores, store16_gpr64_gpr64_plus_gpr64_plus_s8) { + CodeTester tester; + tester.init_code_buffer(512); + + tester.clear(); + tester.emit(IGen::store16_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, R8, 12)); + EXPECT_EQ(tester.dump_to_hex_string(), "66 44 89 44 01 0c"); + + auto instr = IGen::store16_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, RDX, -3); + u8 buff[256]; + instr.emit(buff); + EXPECT_EQ(*(s8*)(buff + instr.offset_of_disp()), -3); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + + for (int k = 0; k < 16; k++) { + if (k == RSP || k == j || k == i) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + + // store + tester.emit(IGen::store16_gpr64_gpr64_plus_gpr64_plus_s8(i, j, k, -3)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + // prepare the memory: + s16 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + + // run! + tester.execute((u64)memory, 6 + 3, 0xffffffffffffff07, 0); + EXPECT_EQ(memory[2], 3); + EXPECT_EQ(memory[3], s16(0xff07)); + EXPECT_EQ(memory[4], 1); + + iter++; + } + } + } +} + +TEST(EmitterLoadsAndStores, store16_gpr64_gpr64_plus_gpr64_plus_s32) { + CodeTester tester; + tester.init_code_buffer(512); + + tester.clear(); + tester.emit(IGen::store16_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, R8, 12)); + EXPECT_EQ(tester.dump_to_hex_string(), "66 44 89 84 01 0c 00 00 00"); + + auto instr = IGen::store16_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, RDX, -3); + u8 buff[256]; + instr.emit(buff); + EXPECT_EQ(*(s8*)(buff + instr.offset_of_disp()), -3); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + + for (int k = 0; k < 16; k++) { + if (k == RSP || k == j || k == i) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + + // store + tester.emit(IGen::store16_gpr64_gpr64_plus_gpr64_plus_s32(i, j, k, -3)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + // prepare the memory: + s16 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + + // run! + tester.execute((u64)memory, 6 + 3, 0xffffffffffffff07, 0); + EXPECT_EQ(memory[2], 3); + EXPECT_EQ(memory[3], s16(0xff07)); + EXPECT_EQ(memory[4], 1); + + iter++; + } + } + } +} + +TEST(EmitterLoadsAndStores, store32_gpr64_gpr64_plus_gpr64) { + CodeTester tester; + tester.init_code_buffer(512); + + tester.clear(); + tester.emit(IGen::store32_gpr64_gpr64_plus_gpr64(RCX, RAX, R8)); + EXPECT_EQ(tester.dump_to_hex_string(), "44 89 04 08"); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + + for (int k = 0; k < 16; k++) { + if (k == RSP || k == j || k == i) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + + // store! + tester.emit(IGen::store32_gpr64_gpr64_plus_gpr64(i, j, k)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + // prepare the memory: + s32 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + + // run! + tester.execute((u64)memory, 12, 0xffffffff12341234, 0); + EXPECT_EQ(memory[2], 3); + EXPECT_EQ(memory[3], 0x12341234); + EXPECT_EQ(memory[4], 1); + + iter++; + } + } + } +} + +TEST(EmitterLoadsAndStores, store32_gpr64_gpr64_plus_gpr64_plus_s8) { + CodeTester tester; + tester.init_code_buffer(512); + + tester.clear(); + tester.emit(IGen::store32_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, R8, 12)); + EXPECT_EQ(tester.dump_to_hex_string(), "44 89 44 01 0c"); + + auto instr = IGen::store32_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, RDX, -3); + u8 buff[256]; + instr.emit(buff); + EXPECT_EQ(*(s8*)(buff + instr.offset_of_disp()), -3); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + + for (int k = 0; k < 16; k++) { + if (k == RSP || k == j || k == i) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + + // store + tester.emit(IGen::store32_gpr64_gpr64_plus_gpr64_plus_s8(i, j, k, -3)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + // prepare the memory: + s32 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + + // run! + tester.execute((u64)memory, 12 + 3, 0xffffffffffffff07, 0); + EXPECT_EQ(memory[2], 3); + EXPECT_EQ(memory[3], s32(0xffffff07)); + EXPECT_EQ(memory[4], 1); + + iter++; + } + } + } +} + +TEST(EmitterLoadsAndStores, store32_gpr64_gpr64_plus_gpr64_plus_s32) { + CodeTester tester; + tester.init_code_buffer(512); + + tester.clear(); + tester.emit(IGen::store32_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, R8, 12)); + EXPECT_EQ(tester.dump_to_hex_string(), "44 89 84 01 0c 00 00 00"); + + auto instr = IGen::store32_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, RDX, -3); + u8 buff[256]; + instr.emit(buff); + EXPECT_EQ(*(s8*)(buff + instr.offset_of_disp()), -3); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + + for (int k = 0; k < 16; k++) { + if (k == RSP || k == j || k == i) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + + // store + tester.emit(IGen::store32_gpr64_gpr64_plus_gpr64_plus_s32(i, j, k, -3)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + // prepare the memory: + s32 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + + // run! + tester.execute((u64)memory, 12 + 3, 0xffffffffffffff07, 0); + EXPECT_EQ(memory[2], 3); + EXPECT_EQ(memory[3], s32(0xffffff07)); + EXPECT_EQ(memory[4], 1); + + iter++; + } + } + } +} + +TEST(EmitterLoadsAndStores, store64_gpr64_gpr64_plus_gpr64) { + CodeTester tester; + tester.init_code_buffer(512); + + tester.clear(); + tester.emit(IGen::store64_gpr64_gpr64_plus_gpr64(RCX, RAX, R8)); + EXPECT_EQ(tester.dump_to_hex_string(), "4c 89 04 08"); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + + for (int k = 0; k < 16; k++) { + if (k == RSP || k == j || k == i) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + + // store! + tester.emit(IGen::store64_gpr64_gpr64_plus_gpr64(i, j, k)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + // prepare the memory: + s64 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + + // run! + tester.execute((u64)memory, 24, 0xffffffff12341234, 0); + EXPECT_EQ(memory[2], 3); + EXPECT_EQ(memory[3], 0xffffffff12341234); + EXPECT_EQ(memory[4], 1); + + iter++; + } + } + } +} + +TEST(EmitterLoadsAndStores, store64_gpr64_gpr64_plus_gpr64_plus_s8) { + CodeTester tester; + tester.init_code_buffer(512); + + tester.clear(); + tester.emit(IGen::store64_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, R8, 12)); + EXPECT_EQ(tester.dump_to_hex_string(), "4c 89 44 01 0c"); + + auto instr = IGen::store64_gpr64_gpr64_plus_gpr64_plus_s8(RAX, RCX, RDX, -3); + u8 buff[256]; + instr.emit(buff); + EXPECT_EQ(*(s8*)(buff + instr.offset_of_disp()), -3); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + + for (int k = 0; k < 16; k++) { + if (k == RSP || k == j || k == i) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + + // store + tester.emit(IGen::store64_gpr64_gpr64_plus_gpr64_plus_s8(i, j, k, -3)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + // prepare the memory: + s64 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + + // run! + tester.execute((u64)memory, 24 + 3, 0xffffffffffffff07, 0); + EXPECT_EQ(memory[2], 3); + EXPECT_EQ(memory[3], s64(0xffffffffffffff07)); + EXPECT_EQ(memory[4], 1); + + iter++; + } + } + } +} + +TEST(EmitterLoadsAndStores, store64_gpr64_gpr64_plus_gpr64_plus_s32) { + CodeTester tester; + tester.init_code_buffer(512); + + tester.clear(); + tester.emit(IGen::store64_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, R8, 12)); + EXPECT_EQ(tester.dump_to_hex_string(), "4c 89 84 01 0c 00 00 00"); + + auto instr = IGen::store64_gpr64_gpr64_plus_gpr64_plus_s32(RAX, RCX, RDX, -3); + u8 buff[256]; + instr.emit(buff); + EXPECT_EQ(*(s8*)(buff + instr.offset_of_disp()), -3); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + + for (int k = 0; k < 16; k++) { + if (k == RSP || k == j || k == i) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + tester.emit(IGen::pop_gpr64(k)); // k will have the value to store. + + // store + tester.emit(IGen::store64_gpr64_gpr64_plus_gpr64_plus_s32(i, j, k, -3)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + // prepare the memory: + s64 memory[8] = {0, 0, 3, -2, 1, 0, 0, 0}; + + // run! + tester.execute((u64)memory, 24 + 3, 0xffffffffffffff07, 0); + EXPECT_EQ(memory[2], 3); + EXPECT_EQ(memory[3], s64(0xffffffffffffff07)); + EXPECT_EQ(memory[4], 1); + + iter++; + } + } + } +} + +TEST(EmitterLoadsAndStores, load64_rip) { + CodeTester tester; + tester.init_code_buffer(256); + tester.emit(IGen::load64_rip_s32(RAX, 12)); + EXPECT_EQ(tester.dump_to_hex_string(), "48 8b 05 0c 00 00 00"); + + tester.clear(); + for (int i = 0; i < 16; i++) { + tester.emit(IGen::load64_rip_s32(i, 12)); + } + + EXPECT_EQ(tester.dump_to_hex_string(true), + "488B050C000000488B0D0C000000488B150C000000488B1D0C000000488B250C000000488B2D0C00000048" + "8B350C000000488B3D0C0000004C8B050C0000004C8B0D0C0000004C8B150C0000004C8B1D0C0000004C8B" + "250C0000004C8B2D0C0000004C8B350C0000004C8B3D0C000000"); +} + +TEST(EmitterLoadsAndStores, load32s_rip) { + CodeTester tester; + tester.init_code_buffer(256); + tester.emit(IGen::load32s_rip_s32(RAX, 12)); + EXPECT_EQ(tester.dump_to_hex_string(), "48 63 05 0c 00 00 00"); + + tester.clear(); + for (int i = 0; i < 16; i++) { + tester.emit(IGen::load32s_rip_s32(i, 12)); + } + + EXPECT_EQ(tester.dump_to_hex_string(true), + "4863050C00000048630D0C0000004863150C00000048631D0C0000004863250C00000048632D0C00000048" + "63350C00000048633D0C0000004C63050C0000004C630D0C0000004C63150C0000004C631D0C0000004C63" + "250C0000004C632D0C0000004C63350C0000004C633D0C000000"); +} + +TEST(EmitterLoadsAndStores, load32u_rip) { + CodeTester tester; + tester.init_code_buffer(256); + tester.emit(IGen::load32u_rip_s32(RAX, 12)); + EXPECT_EQ(tester.dump_to_hex_string(), "8b 05 0c 00 00 00"); + + tester.clear(); + for (int i = 0; i < 16; i++) { + tester.emit(IGen::load32u_rip_s32(i, 12)); + } + + EXPECT_EQ(tester.dump_to_hex_string(true), + "8B050C0000008B0D0C0000008B150C0000008B1D0C0000008B250C0000008B2D0C0000008B350C0000008B" + "3D0C000000448B050C000000448B0D0C000000448B150C000000448B1D0C000000448B250C000000448B2D" + "0C000000448B350C000000448B3D0C000000"); +} + +TEST(EmitterLoadsAndStores, load16u_rip) { + CodeTester tester; + tester.init_code_buffer(256); + tester.emit(IGen::load16u_rip_s32(RAX, 12)); + EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b7 05 0c 00 00 00"); + + tester.clear(); + for (int i = 0; i < 16; i++) { + tester.emit(IGen::load16u_rip_s32(i, 12)); + } + + EXPECT_EQ(tester.dump_to_hex_string(true), + "480FB7050C000000480FB70D0C000000480FB7150C000000480FB71D0C000000480FB7250C000000480FB7" + "2D0C000000480FB7350C000000480FB73D0C0000004C0FB7050C0000004C0FB70D0C0000004C0FB7150C00" + "00004C0FB71D0C0000004C0FB7250C0000004C0FB72D0C0000004C0FB7350C0000004C0FB73D0C000000"); +} + +TEST(EmitterLoadsAndStores, load16s_rip) { + CodeTester tester; + tester.init_code_buffer(256); + tester.emit(IGen::load16s_rip_s32(RAX, 12)); + EXPECT_EQ(tester.dump_to_hex_string(), "48 0f bf 05 0c 00 00 00"); + + tester.clear(); + for (int i = 0; i < 16; i++) { + tester.emit(IGen::load16s_rip_s32(i, 12)); + } + + EXPECT_EQ(tester.dump_to_hex_string(true), + "480FBF050C000000480FBF0D0C000000480FBF150C000000480FBF1D0C000000480FBF250C000000480FBF" + "2D0C000000480FBF350C000000480FBF3D0C0000004C0FBF050C0000004C0FBF0D0C0000004C0FBF150C00" + "00004C0FBF1D0C0000004C0FBF250C0000004C0FBF2D0C0000004C0FBF350C0000004C0FBF3D0C000000"); +} + +TEST(EmitterLoadsAndStores, load8s_rip) { + CodeTester tester; + tester.init_code_buffer(256); + tester.emit(IGen::load8s_rip_s32(RAX, 12)); + EXPECT_EQ(tester.dump_to_hex_string(), "48 0f be 05 0c 00 00 00"); + + tester.clear(); + for (int i = 0; i < 16; i++) { + tester.emit(IGen::load8s_rip_s32(i, 12)); + } + + EXPECT_EQ(tester.dump_to_hex_string(true), + "480FBE050C000000480FBE0D0C000000480FBE150C000000480FBE1D0C000000480FBE250C000000480FBE" + "2D0C000000480FBE350C000000480FBE3D0C0000004C0FBE050C0000004C0FBE0D0C0000004C0FBE150C00" + "00004C0FBE1D0C0000004C0FBE250C0000004C0FBE2D0C0000004C0FBE350C0000004C0FBE3D0C000000"); +} + +TEST(EmitterLoadsAndStores, load8u_rip) { + CodeTester tester; + tester.init_code_buffer(256); + tester.emit(IGen::load8u_rip_s32(RAX, 12)); + EXPECT_EQ(tester.dump_to_hex_string(), "48 0f b6 05 0c 00 00 00"); + + tester.clear(); + for (int i = 0; i < 16; i++) { + tester.emit(IGen::load8u_rip_s32(i, 12)); + } + + EXPECT_EQ(tester.dump_to_hex_string(true), + "480FB6050C000000480FB60D0C000000480FB6150C000000480FB61D0C000000480FB6250C000000480FB6" + "2D0C000000480FB6350C000000480FB63D0C0000004C0FB6050C0000004C0FB60D0C0000004C0FB6150C00" + "00004C0FB61D0C0000004C0FB6250C0000004C0FB62D0C0000004C0FB6350C0000004C0FB63D0C000000"); +} + +TEST(EmitterLoadsAndStores, store64_rip_s32) { + CodeTester tester; + tester.init_code_buffer(256); + tester.emit(IGen::store64_rip_s32(RAX, 12)); + EXPECT_EQ(tester.dump_to_hex_string(), "48 89 05 0c 00 00 00"); + + tester.clear(); + for (int i = 0; i < 16; i++) { + tester.emit(IGen::store64_rip_s32(i, 12)); + } + + EXPECT_EQ(tester.dump_to_hex_string(true), + "4889050C00000048890D0C0000004889150C00000048891D0C0000004889250C00000048892D0C00000048" + "89350C00000048893D0C0000004C89050C0000004C890D0C0000004C89150C0000004C891D0C0000004C89" + "250C0000004C892D0C0000004C89350C0000004C893D0C000000"); +} + +TEST(EmitterLoadsAndStores, store32_rip_s32) { + CodeTester tester; + tester.init_code_buffer(256); + tester.emit(IGen::store32_rip_s32(RAX, 12)); + EXPECT_EQ(tester.dump_to_hex_string(), "89 05 0c 00 00 00"); + + tester.clear(); + for (int i = 0; i < 16; i++) { + tester.emit(IGen::store32_rip_s32(i, 12)); + } + + EXPECT_EQ(tester.dump_to_hex_string(true), + "89050C000000890D0C00000089150C000000891D0C00000089250C000000892D0C00000089350C00000089" + "3D0C0000004489050C00000044890D0C0000004489150C00000044891D0C0000004489250C00000044892D" + "0C0000004489350C00000044893D0C000000"); +} + +TEST(EmitterLoadsAndStores, store16_rip_s32) { + CodeTester tester; + tester.init_code_buffer(256); + tester.emit(IGen::store16_rip_s32(RAX, 12)); + EXPECT_EQ(tester.dump_to_hex_string(), "66 89 05 0c 00 00 00"); + + tester.clear(); + for (int i = 0; i < 16; i++) { + tester.emit(IGen::store16_rip_s32(i, 12)); + } + + EXPECT_EQ(tester.dump_to_hex_string(true), + "6689050C00000066890D0C0000006689150C00000066891D0C0000006689250C00000066892D0C00000066" + "89350C00000066893D0C000000664489050C0000006644890D0C000000664489150C0000006644891D0C00" + "0000664489250C0000006644892D0C000000664489350C0000006644893D0C000000"); +} + +TEST(EmitterLoadsAndStores, store8_rip_s32) { + CodeTester tester; + tester.init_code_buffer(256); + tester.emit(IGen::store8_rip_s32(RAX, 12)); + EXPECT_EQ(tester.dump_to_hex_string(), "88 05 0c 00 00 00"); + + tester.clear(); + for (int i = 0; i < 16; i++) { + tester.emit(IGen::store8_rip_s32(i, 12)); + } + + EXPECT_EQ(tester.dump_to_hex_string(true), + "88050C000000880D0C00000088150C000000881D0C0000004088250C00000040882D0C0000004088350C00" + "000040883D0C0000004488050C00000044880D0C0000004488150C00000044881D0C0000004488250C0000" + "0044882D0C0000004488350C00000044883D0C000000"); +} + +TEST(EmitterLoadsAndStores, static_addr) { + CodeTester tester; + tester.init_code_buffer(512); + + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + tester.clear(); + tester.emit_push_all_gprs(true); + tester.emit(IGen::mov_gpr64_u64(i, 12345)); // load test reg with junk + int start_of_lea = tester.size(); + auto lea_instr = IGen::static_addr(i, INT32_MAX); + tester.emit(lea_instr); + // patch instruction to lea the start of this code + 1. + tester.write(-start_of_lea - lea_instr.length() + 1, + start_of_lea + lea_instr.offset_of_disp()); + tester.emit(IGen::mov_gpr64_gpr64(RAX, i)); + tester.emit_pop_all_gprs(true); + tester.emit_return(); + + auto result = tester.execute(); + EXPECT_EQ(result, (u64)(tester.data()) + 1); + } +} \ No newline at end of file diff --git a/test/test_emitter_slow.cpp b/test/test_emitter_slow.cpp new file mode 100644 index 0000000000..ebe298745d --- /dev/null +++ b/test/test_emitter_slow.cpp @@ -0,0 +1,53 @@ +/*! + * @file test_emitter_slow.cpp + * Tests for the emitter which take over 1 second. (Checking 10,000's of functions). + * + * It may make sense to exclude these tests when developing to save time. + */ + +#include "gtest/gtest.h" +#include "goalc/emitter/CodeTester.h" +#include "goalc/emitter/IGen.h" +// +using namespace emitter; + +TEST(EmitterSlow, xmm32_move) { + std::vector u32_constants = {0, INT32_MAX, UINT32_MAX, 17}; + + // test moving between xmms (32-bit) and gprs. + CodeTester tester; + tester.init_code_buffer(512); + + for (auto constant : u32_constants) { + for (int r1 = 0; r1 < 16; r1++) { + if (r1 == RSP) { + continue; + } + for (int r2 = 0; r2 < 16; r2++) { + if (r2 == RSP) { + continue; + } + for (int r3 = 0; r3 < 16; r3++) { + for (int r4 = 0; r4 < 16; r4++) { + tester.clear(); + tester.emit_push_all_xmms(); + tester.emit_push_all_gprs(true); + // move constant to gpr + tester.emit(IGen::mov_gpr64_u32(r1, constant)); + // move gpr to xmm + tester.emit(IGen::movd_xmm32_gpr32(XMM0 + r3, r1)); + // move xmm to xmm + tester.emit(IGen::mov_xmm32_xmm32(XMM0 + r4, XMM0 + r3)); + // move xmm to gpr + tester.emit(IGen::movd_gpr32_xmm32(r2, XMM0 + r4)); + // return! + tester.emit(IGen::mov_gpr64_gpr64(RAX, r2)); + tester.emit_pop_all_gprs(true); + tester.emit_pop_all_xmms(); + tester.emit_return(); + } + } + } + } + } +} diff --git a/test/test_emitter_xmm32.cpp b/test/test_emitter_xmm32.cpp new file mode 100644 index 0000000000..97b1651214 --- /dev/null +++ b/test/test_emitter_xmm32.cpp @@ -0,0 +1,659 @@ +#include "gtest/gtest.h" +#include "goalc/emitter/CodeTester.h" +#include "goalc/emitter/IGen.h" +#include "third-party/fmt/core.h" + +using namespace emitter; + +TEST(EmitterXmm32, load32_xmm32_gpr64_plus_gpr64) { + CodeTester tester; + tester.init_code_buffer(512); + tester.emit(IGen::load32_xmm32_gpr64_plus_gpr64(XMM3, RAX, RBX)); + EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 10 1c 03"); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + for (int k = 0; k < 16; k++) { + tester.clear(); + tester.emit_push_all_xmms(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // fill k with junk + tester.emit(IGen::mov_gpr64_u64(i, (iter & 1) ? 0 : UINT64_MAX)); + tester.emit(IGen::movd_xmm32_gpr32(XMM0 + k, i)); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + + // load into k + tester.emit(IGen::load32_xmm32_gpr64_plus_gpr64(XMM0 + k, i, j)); + // move to return + tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + k)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_pop_all_xmms(); + tester.emit_return(); + + // prepare the memory: + float memory[8] = {0, 0, 1.23f, 3.45f, 5.67f, 0, 0, 0}; + + // run! + EXPECT_EQ(tester.execute_ret((u64)memory, 3 * sizeof(float), 0, 0), 3.45f); + EXPECT_EQ(tester.execute_ret((u64)memory, 2 * sizeof(float), 0, 0), 1.23f); + EXPECT_EQ(tester.execute_ret((u64)memory, 4 * sizeof(float), 0, 0), 5.67f); + EXPECT_EQ(tester.execute_ret((u64)memory, 5 * sizeof(float), 0, 0), 0); + + iter++; + } + } + } +} + +TEST(EmitterXmm32, load32_xmm32_gpr64_plus_gpr64_plus_s8) { + CodeTester tester; + tester.init_code_buffer(512); + tester.emit(IGen::load32_xmm32_gpr64_plus_gpr64_plus_s8(XMM3, RAX, RBX, -1)); + EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 10 5c 03 ff"); + + auto instr = IGen::load32_xmm32_gpr64_plus_gpr64_plus_s8(XMM3, RBX, RSI, -3); + u8 buff[256]; + instr.emit(buff); + EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + for (int k = 0; k < 16; k++) { + tester.clear(); + tester.emit_push_all_xmms(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // fill k with junk + tester.emit(IGen::mov_gpr64_u64(i, (iter & 1) ? 0 : UINT64_MAX)); + tester.emit(IGen::movd_xmm32_gpr32(XMM0 + k, i)); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + + // load into k + tester.emit(IGen::load32_xmm32_gpr64_plus_gpr64_plus_s8(XMM0 + k, i, j, -3)); + // move to return + tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + k)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_pop_all_xmms(); + tester.emit_return(); + + // prepare the memory: + float memory[8] = {0, 0, 1.23f, 3.45f, 5.67f, 0, 0, 0}; + + // run! + EXPECT_EQ(tester.execute_ret((u64)memory, 3 * sizeof(float) + 3, 0, 0), 3.45f); + EXPECT_EQ(tester.execute_ret((u64)memory, 2 * sizeof(float) + 3, 0, 0), 1.23f); + EXPECT_EQ(tester.execute_ret((u64)memory, 4 * sizeof(float) + 3, 0, 0), 5.67f); + EXPECT_EQ(tester.execute_ret((u64)memory, 5 * sizeof(float) + 3, 0, 0), 0); + + iter++; + } + } + } +} + +TEST(EmitterXmm32, load32_xmm32_gpr64_plus_gpr64_plus_s32) { + CodeTester tester; + tester.init_code_buffer(512); + tester.emit(IGen::load32_xmm32_gpr64_plus_gpr64_plus_s32(XMM3, RAX, RBX, -1)); + EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 10 9c 03 ff ff ff ff"); + + auto instr = IGen::load32_xmm32_gpr64_plus_gpr64_plus_s32(XMM3, RBX, RSI, -1234); + u8 buff[256]; + instr.emit(buff); + EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -1234); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + for (int k = 0; k < 16; k++) { + tester.clear(); + tester.emit_push_all_xmms(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); + + // fill k with junk + tester.emit(IGen::mov_gpr64_u64(i, (iter & 1) ? 0 : UINT64_MAX)); + tester.emit(IGen::movd_xmm32_gpr32(XMM0 + k, i)); + + // pop args into appropriate register + tester.emit(IGen::pop_gpr64(i)); // i will have offset 0 + tester.emit(IGen::pop_gpr64(j)); // j will have offset 1 + + s64 offset = (iter & 1) ? INT32_MAX : INT32_MIN; + + // load into k + tester.emit(IGen::load32_xmm32_gpr64_plus_gpr64_plus_s32(XMM0 + k, i, j, offset)); + // move to return + tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + k)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_pop_all_xmms(); + tester.emit_return(); + + // prepare the memory: + float memory[8] = {0, 0, 1.23f, 3.45f, 5.67f, 0, 0, 0}; + + // run! + EXPECT_EQ(tester.execute_ret((u64)memory, 3 * sizeof(float) - offset, 0, 0), 3.45f); + EXPECT_EQ(tester.execute_ret((u64)memory, 2 * sizeof(float) - offset, 0, 0), 1.23f); + EXPECT_EQ(tester.execute_ret((u64)memory, 4 * sizeof(float) - offset, 0, 0), 5.67f); + EXPECT_EQ(tester.execute_ret((u64)memory, 5 * sizeof(float) - offset, 0, 0), 0); + iter++; + } + } + } +} + +namespace { +template +float as_float(T x) { + float result; + memcpy(&result, &x, sizeof(float)); + return result; +} + +u32 as_u32(float x) { + u32 result; + memcpy(&result, &x, 4); + return result; +} +} // namespace + +TEST(EmitterXmm32, store32_xmm32_gpr64_plus_gpr64) { + CodeTester tester; + tester.init_code_buffer(512); + tester.emit(IGen::store32_xmm32_gpr64_plus_gpr64(RAX, RBX, XMM7)); + EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 11 3c 03"); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + + for (int k = 0; k < 16; k++) { + tester.clear(); + tester.emit_push_all_xmms(); + tester.emit_push_all_gprs(true); + // push args to the stack + + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); // addr2 + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); // addr1 + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); // value + + // pop value into addr1 GPR + tester.emit(IGen::pop_gpr64(i)); + // move to XMM + tester.emit(IGen::movd_xmm32_gpr32(XMM0 + k, i)); + + // pop addrs + tester.emit(IGen::pop_gpr64(i)); + tester.emit(IGen::pop_gpr64(j)); + + // store + tester.emit(IGen::store32_xmm32_gpr64_plus_gpr64(i, j, XMM0 + k)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_pop_all_xmms(); + tester.emit_return(); + + // prepare the memory: + float memory[8] = {0, 0, 1.23f, 3.45f, 5.67f, 0, 0, 0}; + + // run! + tester.execute((u64)memory, 12, as_u32(1.234f), 0); + EXPECT_EQ(memory[2], 1.23f); + EXPECT_EQ(memory[3], 1.234f); + EXPECT_EQ(memory[4], 5.67f); + + iter++; + } + } + } +} + +TEST(EmitterXmm32, store32_xmm32_gpr64_plus_gpr64_plus_s8) { + CodeTester tester; + tester.init_code_buffer(512); + tester.emit(IGen::store32_xmm32_gpr64_plus_gpr64_plus_s8(RAX, RBX, XMM3, -1)); + EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 11 5c 03 ff"); + + auto instr = IGen::store32_xmm32_gpr64_plus_gpr64_plus_s8(RBX, RSI, XMM3, -3); + u8 buff[256]; + instr.emit(buff); + EXPECT_EQ(s8(buff[instr.offset_of_disp()]), -3); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + for (int k = 0; k < 16; k++) { + tester.clear(); + tester.emit_push_all_xmms(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); // addr2 + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); // addr1 + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); // value + + // pop value into addr1 GPR + tester.emit(IGen::pop_gpr64(i)); + // move to XMM + tester.emit(IGen::movd_xmm32_gpr32(XMM0 + k, i)); + + // pop addrs + tester.emit(IGen::pop_gpr64(i)); + tester.emit(IGen::pop_gpr64(j)); + + s64 offset = (iter & 1) ? INT8_MAX : INT8_MIN; + + // load into k + tester.emit(IGen::store32_xmm32_gpr64_plus_gpr64_plus_s8(i, j, XMM0 + k, offset)); + + // move to return + tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + k)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_pop_all_xmms(); + tester.emit_return(); + + // prepare the memory: + float memory[8] = {0, 0, 1.23f, 3.45f, 5.67f, 0, 0, 0}; + + // run! + tester.execute((u64)memory, 12 - offset, as_u32(1.234f), 0); + EXPECT_EQ(memory[2], 1.23f); + EXPECT_EQ(memory[3], 1.234f); + EXPECT_EQ(memory[4], 5.67f); + + iter++; + } + } + } +} + +TEST(EmitterXmm32, store32_xmm32_gpr64_plus_gpr64_plus_s32) { + CodeTester tester; + tester.init_code_buffer(512); + tester.emit(IGen::store32_xmm32_gpr64_plus_gpr64_plus_s32(RAX, RBX, XMM3, -1)); + EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 11 9c 03 ff ff ff ff"); + + auto instr = IGen::store32_xmm32_gpr64_plus_gpr64_plus_s32(RBX, RSI, XMM3, -1234); + u8 buff[256]; + instr.emit(buff); + EXPECT_EQ(*(s32*)(buff + instr.offset_of_disp()), -1234); + + int iter = 0; + for (int i = 0; i < 16; i++) { + if (i == RSP) { + continue; + } + for (int j = 0; j < 16; j++) { + if (j == RSP || j == i) { + continue; + } + for (int k = 0; k < 16; k++) { + tester.clear(); + tester.emit_push_all_xmms(); + tester.emit_push_all_gprs(true); + // push args to the stack + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(1))); // addr2 + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(0))); // addr1 + tester.emit(IGen::push_gpr64(tester.get_c_abi_arg_reg(2))); // value + + // pop value into addr1 GPR + tester.emit(IGen::pop_gpr64(i)); + // move to XMM + tester.emit(IGen::movd_xmm32_gpr32(XMM0 + k, i)); + + // pop addrs + tester.emit(IGen::pop_gpr64(i)); + tester.emit(IGen::pop_gpr64(j)); + + s64 offset = (iter & 1) ? INT32_MAX : INT32_MIN; + + // load into k + tester.emit(IGen::store32_xmm32_gpr64_plus_gpr64_plus_s32(i, j, XMM0 + k, offset)); + + // move to return + tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + k)); + + // return! + tester.emit_pop_all_gprs(true); + tester.emit_pop_all_xmms(); + tester.emit_return(); + + // prepare the memory: + float memory[8] = {0, 0, 1.23f, 3.45f, 5.67f, 0, 0, 0}; + + // run! + tester.execute((u64)memory, 12 - offset, as_u32(1.234f), 0); + EXPECT_EQ(memory[2], 1.23f); + EXPECT_EQ(memory[3], 1.234f); + EXPECT_EQ(memory[4], 5.67f); + + iter++; + } + } + } +} + +TEST(EmitterXmm32, static_load_xmm32) { + CodeTester tester; + tester.init_code_buffer(512); + for (int i = 0; i < 16; i++) { + tester.clear(); + tester.emit_push_all_xmms(); + tester.emit_push_all_gprs(true); + + auto loc_of_load = tester.size(); + auto load_instr = IGen::static_load_xmm32(XMM0 + i, INT32_MAX); + + tester.emit(load_instr); + tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + i)); + tester.emit_pop_all_gprs(true); + tester.emit_pop_all_xmms(); + tester.emit_return(); + auto loc_of_float = tester.emit_data(float(1.2345f)); + + // patch offset + tester.write(loc_of_float - loc_of_load - load_instr.length(), + loc_of_load + load_instr.offset_of_disp()); + + auto result = tester.execute_ret(0, 0, 0, 0); + EXPECT_EQ(result, 1.2345f); + } +} + +TEST(EmitterXmm32, static_store_xmm32) { + CodeTester tester; + tester.init_code_buffer(512); + for (int i = 0; i < 16; i++) { + tester.clear(); + tester.emit_push_all_xmms(); + tester.emit_push_all_gprs(true); + tester.emit(IGen::movd_xmm32_gpr32(XMM0 + i, tester.get_c_abi_arg_reg(0))); + + auto loc_of_store = tester.size(); + auto store_instr = IGen::static_store_xmm32(XMM0 + i, INT32_MAX); + + tester.emit(store_instr); + tester.emit_pop_all_gprs(true); + tester.emit_pop_all_xmms(); + tester.emit_return(); + auto loc_of_float = tester.emit_data(float(1.2345f)); + + tester.write(loc_of_float - loc_of_store - store_instr.length(), + loc_of_store + store_instr.offset_of_disp()); + tester.execute(as_u32(-44.567f), 0, 0, 0); + EXPECT_EQ(-44.567f, tester.read(loc_of_float)); + } +} + +TEST(EmitterXmm32, ucomiss) { + CodeTester tester; + tester.init_code_buffer(512); + tester.emit(IGen::cmp_flt_flt(XMM13, XMM14)); + EXPECT_EQ("45 0f 2e ee", tester.dump_to_hex_string()); +} + +TEST(EmitterXmm32, mul) { + CodeTester tester; + tester.init_code_buffer(512); + + std::vector vals = {0.f, 1.f, 0.2f, -1.f, 1235423.2f, -3457343.3f, 7.545f}; + + for (auto f : vals) { + for (auto g : vals) { + for (int i = 0; i < 16; i++) { + for (int j = 0; j < 16; j++) { + if (i == j) { + continue; + } + auto expected = f * g; + tester.clear(); + tester.emit_push_all_xmms(); + tester.emit_push_all_gprs(true); + u64 val = 0; + memcpy(&val, &f, sizeof(float)); + tester.emit(IGen::mov_gpr64_u64(RAX, val)); + tester.emit(IGen::movd_xmm32_gpr32(XMM0 + i, RAX)); + memcpy(&val, &g, sizeof(float)); + tester.emit(IGen::mov_gpr64_u64(RAX, val)); + tester.emit(IGen::movd_xmm32_gpr32(XMM0 + j, RAX)); + tester.emit(IGen::mulss_xmm_xmm(XMM0 + j, XMM0 + i)); + tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + j)); + tester.emit_pop_all_gprs(true); + tester.emit_pop_all_xmms(); + tester.emit_return(); + auto result = tester.execute_ret(0, 0, 0, 0); + EXPECT_EQ(result, expected); + } + } + } + } +} + +TEST(EmitterXmm32, div) { + CodeTester tester; + tester.init_code_buffer(512); + + std::vector vals = {1.f, 0.2f, -1.f, 1235423.2f, -3457343.3f, 7.545f}; + + for (auto f : vals) { + for (auto g : vals) { + for (int i = 0; i < 16; i++) { + for (int j = 0; j < 16; j++) { + if (i == j) { + continue; + } + auto expected = g / f; + tester.clear(); + tester.emit_push_all_xmms(); + tester.emit_push_all_gprs(true); + u64 val = 0; + memcpy(&val, &f, sizeof(float)); + tester.emit(IGen::mov_gpr64_u64(RAX, val)); + tester.emit(IGen::movd_xmm32_gpr32(XMM0 + i, RAX)); + memcpy(&val, &g, sizeof(float)); + tester.emit(IGen::mov_gpr64_u64(RAX, val)); + tester.emit(IGen::movd_xmm32_gpr32(XMM0 + j, RAX)); + tester.emit(IGen::divss_xmm_xmm(XMM0 + j, XMM0 + i)); + tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + j)); + tester.emit_pop_all_gprs(true); + tester.emit_pop_all_xmms(); + tester.emit_return(); + auto result = tester.execute_ret(0, 0, 0, 0); + EXPECT_EQ(result, expected); + } + } + } + } +} + +TEST(EmitterXmm32, add) { + CodeTester tester; + tester.init_code_buffer(512); + + std::vector vals = {0.f, 1.f, 0.2f, -1.f, 1235423.2f, -3457343.3f, 7.545f}; + for (auto f : vals) { + for (auto g : vals) { + for (int i = 0; i < 16; i++) { + for (int j = 0; j < 16; j++) { + if (i == j) { + continue; + } + auto expected = g + f; + tester.clear(); + tester.emit_push_all_xmms(); + tester.emit_push_all_gprs(true); + u64 val = 0; + memcpy(&val, &f, sizeof(float)); + tester.emit(IGen::mov_gpr64_u64(RAX, val)); + tester.emit(IGen::movd_xmm32_gpr32(XMM0 + i, RAX)); + memcpy(&val, &g, sizeof(float)); + tester.emit(IGen::mov_gpr64_u64(RAX, val)); + tester.emit(IGen::movd_xmm32_gpr32(XMM0 + j, RAX)); + tester.emit(IGen::addss_xmm_xmm(XMM0 + j, XMM0 + i)); + tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + j)); + tester.emit_pop_all_gprs(true); + tester.emit_pop_all_xmms(); + tester.emit_return(); + auto result = tester.execute_ret(0, 0, 0, 0); + EXPECT_EQ(result, expected); + } + } + } + } +} + +TEST(EmitterXmm32, sub) { + CodeTester tester; + tester.init_code_buffer(512); + + std::vector vals = {0.f, 1.f, 0.2f, -1.f, 1235423.2f, -3457343.3f, 7.545f}; + + for (auto f : vals) { + for (auto g : vals) { + for (int i = 0; i < 16; i++) { + for (int j = 0; j < 16; j++) { + if (i == j) { + continue; + } + auto expected = g - f; + tester.clear(); + tester.emit_push_all_xmms(); + tester.emit_push_all_gprs(true); + u64 val = 0; + memcpy(&val, &f, sizeof(float)); + tester.emit(IGen::mov_gpr64_u64(RAX, val)); + tester.emit(IGen::movd_xmm32_gpr32(XMM0 + i, RAX)); + memcpy(&val, &g, sizeof(float)); + tester.emit(IGen::mov_gpr64_u64(RAX, val)); + tester.emit(IGen::movd_xmm32_gpr32(XMM0 + j, RAX)); + tester.emit(IGen::subss_xmm_xmm(XMM0 + j, XMM0 + i)); + tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + j)); + tester.emit_pop_all_gprs(true); + tester.emit_pop_all_xmms(); + tester.emit_return(); + auto result = tester.execute_ret(0, 0, 0, 0); + EXPECT_EQ(result, expected); + } + } + } + } +} + +TEST(EmitterXmm32, float_to_int) { + CodeTester tester; + tester.init_code_buffer(512); + + std::vector vals = {0.f, 1.f, 0.2f, -1.f, 1235423.2f, -3457343.3f, + 7.545f, 0.1f, 0.9f, -0.1f, -0.9f}; + + for (auto g : vals) { + for (int i = 0; i < 16; i++) { + for (int j = 0; j < 16; j++) { + if (j == RSP) { + continue; + } + s32 expected = g; + tester.clear(); + tester.emit_push_all_xmms(); + tester.emit_push_all_gprs(true); + u64 val = 0; + memcpy(&val, &g, sizeof(float)); + tester.emit(IGen::mov_gpr64_u64(RAX, val)); + tester.emit(IGen::movd_xmm32_gpr32(XMM0 + i, RAX)); + tester.emit(IGen::float_to_int32(j, XMM0 + i)); + tester.emit(IGen::mov_gpr64_gpr64(RAX, j)); + tester.emit_pop_all_gprs(true); + tester.emit_pop_all_xmms(); + tester.emit_return(); + auto result = tester.execute_ret(0, 0, 0, 0); + EXPECT_EQ(result, expected); + } + } + } +} + +TEST(EmitterXmm32, int_to_float) { + CodeTester tester; + tester.init_code_buffer(512); + + std::vector vals = {0, 1, -1, INT32_MAX, -3457343, 7, INT32_MIN}; + + for (auto g : vals) { + for (int i = 0; i < 16; i++) { + for (int j = 0; j < 16; j++) { + if (j == RSP) { + continue; + } + float expected = g; + tester.clear(); + tester.emit_push_all_xmms(); + tester.emit_push_all_gprs(true); + tester.emit(IGen::mov_gpr64_u64(j, g)); + tester.emit(IGen::int32_to_float(XMM0 + i, j)); + tester.emit(IGen::movd_gpr32_xmm32(RAX, XMM0 + i)); + tester.emit_pop_all_gprs(true); + tester.emit_pop_all_xmms(); + tester.emit_return(); + auto result = tester.execute_ret(0, 0, 0, 0); + EXPECT_EQ(result, expected); + } + } + } +} \ No newline at end of file