Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

legalize instructions #1926

Merged
merged 3 commits into from
Oct 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 26 additions & 6 deletions emu/decode.h
Original file line number Diff line number Diff line change
Expand Up @@ -268,11 +268,18 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
READMODRM; VMOV(xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x11: TRACEI("movupd xmm, xmm:modrm");
READMODRM; VMOV(xmm_modrm_reg, xmm_modrm_val,128); break;
case 0x12: TRACEI("movlpd xmm, modrm");
READMODRM; V_OP(movl_p, modrm_val, xmm_modrm_reg,64); break;
case 0x13: TRACEI("movlpd modrm, xmm");
READMODRM; V_OP(movl_pm, xmm_modrm_reg, modrm_val,64); break;
case 0x14: TRACEI("unpcklpd xmm, xmm:modrm");
READMODRM; V_OP(unpackl_pd, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x15: TRACEI("unpckhpd xmm, xmm:modrm");
READMODRM; V_OP(unpackh_pd, xmm_modrm_val, xmm_modrm_reg,128); break;

case 0x16: TRACEI("movhpd xmm, modrm");
READMODRM; V_OP(movh_p, modrm_val, xmm_modrm_reg,64); break;
case 0x17: TRACEI("movhpd modrm, xmm");
READMODRM; V_OP(movh_pm, xmm_modrm_reg, modrm_val,64); break;
case 0x2e: TRACEI("ucomisd xmm, xmm:modrm");
READMODRM; V_OP(single_ucomi, xmm_modrm_val, xmm_modrm_reg,64); break;
case 0x2f: TRACEI("comisd xmm, xmm:modrm");
Expand Down Expand Up @@ -422,6 +429,8 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
READMODRM; V_OP(mulu, xmm_modrm_val, xmm_modrm_reg, 128); break;
case 0xe6: TRACEI("cvttpd2dq xmm:modrm, xmm");
READMODRM; V_OP(cvttpd2dq, xmm_modrm_val, xmm_modrm_reg,64); break;
case 0xe7: TRACEI("movntdq xmm, xmm:modrm");
READMODRM; VMOV(xmm_modrm_reg, xmm_modrm_val,128); break;
case 0xe8: TRACEI("psubsb xmm:modrm, xmm");
READMODRM; V_OP(subss_b, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xe9: TRACEI("psubsw xmm:modrm, xmm");
Expand Down Expand Up @@ -465,14 +474,18 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
READMODRM; VMOV(xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x11: TRACEI("movups xmm, xmm:modrm");
READMODRM; VMOV(xmm_modrm_reg, xmm_modrm_val,128); break;

case 0x12: TRACEI("movlps xmm, modrm");
READMODRM; V_OP(movl_p, modrm_val, xmm_modrm_reg,64); break;
case 0x13: TRACEI("movlps modrm, xmm");
READMODRM; V_OP(movl_pm, xmm_modrm_reg, modrm_val,64); break;
case 0x14: TRACEI("unpcklps xmm, xmm:modrm");
READMODRM; V_OP(unpackl_ps, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x15: TRACEI("unpckhps xmm, xmm:modrm");
READMODRM; V_OP(unpackh_ps, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x16: TRACEI("movlhps xmm, xmm:modrm");
READMODRM; V_OP(movlh_ps, xmm_modrm_val, xmm_modrm_reg,128); break;

case 0x16: TRACEI("movhps xmm, modrm");
READMODRM; V_OP(movh_p, modrm_val, xmm_modrm_reg,64); break;
case 0x17: TRACEI("movhps modrm, xmm");
READMODRM; V_OP(movh_pm, xmm_modrm_reg, modrm_val,64); break;
case 0x2e: TRACEI("ucomiss xmm, xmm:modrm");
READMODRM; V_OP(single_ucomi, xmm_modrm_val, xmm_modrm_reg,32); break;
case 0x2f: TRACEI("comiss xmm, xmm:modrm");
Expand Down Expand Up @@ -530,7 +543,8 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {

case 0xe5: TRACEI("pmulhw mm:modrm, mm");
READMODRM; V_OP(mulu, mm_modrm_val, mm_modrm_reg,64); break;

case 0xe7: TRACEI("movntq mm, mm:modrm");
READMODRM_MEM; VMOV(mm_modrm_reg, mm_modrm_val,64); break;
case 0xef: TRACEI("pxor mm:modrm, mm");
READMODRM; V_OP(xor, mm_modrm_val, mm_modrm_reg,64); break;

Expand Down Expand Up @@ -1186,6 +1200,8 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
READMODRM; V_OP(cvtsi2ss, modrm_val, xmm_modrm_reg,32); break;
case 0x2c: TRACEI("cvttss2si reg, xmm:modrm");
READMODRM; V_OP(cvttss2si, xmm_modrm_val, modrm_reg,32); break;
case 0x51: TRACEI("sqrtss xmm:modrm, xmm");
READMODRM; V_OP(single_fsqrt, xmm_modrm_val, xmm_modrm_reg,32); break;
case 0x5a: TRACEI("cvtss2sd xmm:modrm, xmm");
READMODRM; V_OP(cvtss2sd, xmm_modrm_val, xmm_modrm_reg,32); break;
case 0x5b: TRACEI("cvttps2dq xmm:modrm, xmm");
Expand All @@ -1197,8 +1213,12 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
READMODRM; V_OP(single_fmul, xmm_modrm_val, xmm_modrm_reg,32); break;
case 0x5c: TRACEI("subss xmm:modrm, xmm");
READMODRM; V_OP(single_fsub, xmm_modrm_val, xmm_modrm_reg,32); break;
case 0x5d: TRACEI("minss xmm:modrm, xmm");
READMODRM; V_OP(single_fmin, xmm_modrm_val, xmm_modrm_reg,32); break;
case 0x5e: TRACEI("divss xmm:modrm, xmm");
READMODRM; V_OP(single_fdiv, xmm_modrm_val, xmm_modrm_reg,32); break;
case 0x5f: TRACEI("maxss xmm:modrm, xmm");
READMODRM; V_OP(single_fmax, xmm_modrm_val, xmm_modrm_reg,32); break;
case 0x6f: TRACEI("movdqu xmm:modrm, xmm");
READMODRM; VMOV(xmm_modrm_val, xmm_modrm_reg,128); break;

Expand Down
23 changes: 20 additions & 3 deletions emu/vec.c
Original file line number Diff line number Diff line change
Expand Up @@ -381,13 +381,20 @@ void vec_single_fdiv64(NO_CPU, const double *src, double *dst) { *dst /= *src; }
void vec_single_fdiv32(NO_CPU, const float *src, float *dst) { *dst /= *src; }

void vec_single_fsqrt64(NO_CPU, const double *src, double *dst) { *dst = sqrt(*src); }
void vec_single_fsqrt32(NO_CPU, const float *src, float *dst) { *dst = sqrtf(*src); }

void vec_single_fmax64(NO_CPU, const double *src, double *dst) {
if (*src > *dst || isnan(*src) || isnan(*dst)) *dst = *src;
}
void vec_single_fmin64(NO_CPU, const double *src, double *dst) {
if (*src < *dst || isnan(*src) || isnan(*dst)) *dst = *src;
}
void vec_single_fmax32(NO_CPU, const float *src, float *dst) {
if (*src > *dst || isnan(*src) || isnan(*dst)) *dst = *src;
}
void vec_single_fmin32(NO_CPU, const float *src, float *dst) {
if (*src < *dst || isnan(*src) || isnan(*dst)) *dst = *src;
}

void vec_single_ucomi32(struct cpu_state *cpu, const float *src, const float *dst) {
cpu->zf_res = cpu->pf_res = 0;
Expand Down Expand Up @@ -531,9 +538,6 @@ void vec_unpackh_pd128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
dst->f64[0] = dst->f64[1];
dst->f64[1] = src->f64[1];
}
void vec_movlh_ps128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
dst->qw[1] = src->qw[0];
}

void vec_packss_w128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
dst->u32[0] = (satsw(dst->u16[0]) << 0x00) | (satsw(dst->u16[1]) << 0x08) |
Expand Down Expand Up @@ -622,6 +626,19 @@ void vec_fmovmask_d128(NO_CPU, const union xmm_reg *src, uint32_t *dst) {
}
}

void vec_movl_p64(NO_CPU, const uint64_t *src, union xmm_reg *dst) {
dst->qw[0] = *src;
}
void vec_movl_pm64(NO_CPU, const union xmm_reg *src, uint64_t *dst) {
*dst = src->qw[0];
}
void vec_movh_p64(NO_CPU, const uint64_t *src, union xmm_reg *dst) {
dst->qw[1] = *src;
}
void vec_movh_pm64(NO_CPU, const union xmm_reg *src, uint64_t *dst) {
*dst = src->qw[1];
}

void vec_extract_w128(NO_CPU, const union xmm_reg *src, uint32_t *dst, uint8_t index) {
*dst = src->u16[index % 8];
}
Expand Down
10 changes: 9 additions & 1 deletion emu/vec.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,12 @@ void vec_single_fsub32(NO_CPU, const float *src, float *dst);
void vec_single_fdiv64(NO_CPU, const double *src, double *dst);
void vec_single_fdiv32(NO_CPU, const float *src, float *dst);
void vec_single_fsqrt64(NO_CPU, const double *src, double *dst);
void vec_single_fsqrt32(NO_CPU, const float *src, float *dst);

void vec_single_fmax64(NO_CPU, const double *src, double *dst);
void vec_single_fmax32(NO_CPU, const float *src, float *dst);
void vec_single_fmin64(NO_CPU, const double *src, double *dst);
void vec_single_fmin32(NO_CPU, const float *src, float *dst);
void vec_single_ucomi32(struct cpu_state *cpu, const float *src, const float *dst);
void vec_single_ucomi64(struct cpu_state *cpu, const double *src, const double *dst);
void vec_single_fcmp64(NO_CPU, const double *src, union xmm_reg *dst, uint8_t type);
Expand Down Expand Up @@ -124,7 +127,7 @@ void vec_unpackh_d128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_unpackh_dq128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_unpackh_ps128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_unpackh_pd128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_movlh_ps128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);

void vec_shuffle_lw128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst, uint8_t encoding);
void vec_shuffle_hw128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst, uint8_t encoding);
void vec_shuffle_d128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst, uint8_t encoding);
Expand All @@ -136,6 +139,11 @@ void vec_compares_gtb128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_compares_gtw128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_compares_gtd128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);

void vec_movl_p64(NO_CPU, const uint64_t *src, union xmm_reg *dst);
void vec_movl_pm64(NO_CPU, const union xmm_reg *src, uint64_t *dst);
void vec_movh_p64(NO_CPU, const uint64_t *src, union xmm_reg *dst);
void vec_movh_pm64(NO_CPU, const union xmm_reg *src, uint64_t *dst);

void vec_movmask_b128(NO_CPU, const union xmm_reg *src, uint32_t *dst);
void vec_fmovmask_d128(NO_CPU, const union xmm_reg *src, uint32_t *dst);
void vec_extract_w128(NO_CPU, const union xmm_reg *src, uint32_t *dst, uint8_t index);
Expand Down
30 changes: 30 additions & 0 deletions tests/e2e/qemu/expected.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4696,3 +4696,33 @@ cvttsd2si: a=c00b3333333333334004cccccccccccd r=00000002
cvttpd2dq: a=c00b3333333333334004cccccccccccd r=0000000000000000fffffffd00000002
cvtsi2ss : a=fffffffa r=0000000000000000fffffffdc0c00000
cvtsi2sd : a=fffffffa r=0000000000000000c018000000000000
movlps : a=456723c698694873 r=0000000000000000456723c698694873
movlps : a=007c62c2085427f8 r=0000000000000000007c62c2085427f8
movlps : a=dc515cff944a58ec456723c698694873 r=456723c698694873
movlps : a=231be9e8cde7438d007c62c2085427f8 r=007c62c2085427f8
movhps : a=dc515cff944a58ec r=dc515cff944a58ec007c62c2085427f8
movhps : a=231be9e8cde7438d r=231be9e8cde7438d007c62c2085427f8
movhps : a=dc515cff944a58ec456723c698694873 r=dc515cff944a58ec
movhps : a=231be9e8cde7438d007c62c2085427f8 r=231be9e8cde7438d
movlpd : a=456723c698694873 r=231be9e8cde7438d456723c698694873
movlpd : a=007c62c2085427f8 r=231be9e8cde7438d007c62c2085427f8
movlpd : a=dc515cff944a58ec456723c698694873 r=456723c698694873
movlpd : a=231be9e8cde7438d007c62c2085427f8 r=007c62c2085427f8
movhpd : a=dc515cff944a58ec r=dc515cff944a58ec007c62c2085427f8
movhpd : a=231be9e8cde7438d r=231be9e8cde7438d007c62c2085427f8
movhpd : a=dc515cff944a58ec456723c698694873 r=dc515cff944a58ec
movhpd : a=231be9e8cde7438d007c62c2085427f8 r=231be9e8cde7438d
movntq : a=456723c698694873 r=456723c698694873
movntq : a=007c62c2085427f8 r=007c62c2085427f8
movntdq : a=dc515cff944a58ec456723c698694873 r=dc515cff944a58ec456723c698694873
movntdq : a=231be9e8cde7438d007c62c2085427f8 r=231be9e8cde7438d007c62c2085427f8
movups : a=dc515cff944a58ec456723c698694873 r=dc515cff944a58ec456723c698694873
movups : a=231be9e8cde7438d007c62c2085427f8 r=231be9e8cde7438d007c62c2085427f8
movupd : a=dc515cff944a58ec456723c698694873 r=dc515cff944a58ec456723c698694873
movupd : a=231be9e8cde7438d007c62c2085427f8 r=231be9e8cde7438d007c62c2085427f8
minss : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=dc515cff944a58ec456723c698694873
minss : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=231be9e8cde7438d007c62c2085427f8
maxss : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=dc515cff944a58ec456723c658bad7ab
maxss : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=231be9e8cde7438d007c62c2085427f8
sqrtss : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=dc515cff944a58ec456723c64c1aa5bf
sqrtss : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=231be9e8cde7438d007c62c223e90c9e
86 changes: 84 additions & 2 deletions tests/e2e/qemu/qemu-test.c
Original file line number Diff line number Diff line change
Expand Up @@ -2116,8 +2116,8 @@ static void test_enter(void)
#endif
#ifdef TEST_SSE

typedef int __m64 __attribute__ ((__mode__ (__V2SI__)));
typedef float __m128 __attribute__ ((__mode__(__V4SF__)));
typedef int __m64 __attribute__ ((vector_size (8)));
typedef float __m128 __attribute__ ((vector_size (16)));

typedef union {
double d[2];
Expand All @@ -2134,6 +2134,73 @@ static uint64_t __attribute__((aligned(16))) test_values[4][2] = {
{ 0x0f76255a085427f8, 0xc233e9e8c4c9439a },
};

#define MOV_OP(op, hi, rm)\
{\
r.q[0] = r.q[1] = 0;\
if (rm) {\
uint64_t mem;\
asm volatile (#op " %1, %0" : "=m" (mem) : "x" (a.dq));\
printf("%-9s: a=" FMT64X "" FMT64X " r=" FMT64X "\n", #op, a.q[1], a.q[0], mem);\
} else {\
uint64_t mem = a.q[hi];\
asm volatile (#op " %1, %0" : "=x" (r.dq) : "m" (mem));\
printf("%-9s: a=" FMT64X " r=" FMT64X "" FMT64X "\n", #op, mem, r.q[1], r.q[0]);\
}\
}
#define MOV_OP_REGMEM(op, hi, rm)\
{\
int i;\
for(i=0;i<2;i++) {\
a.q[0] = test_values[2*i][0];\
a.q[1] = test_values[2*i][1];\
MOV_OP(op, hi, rm);\
}\
}
#define MOVL_OP2(op)\
{\
MOV_OP_REGMEM(op, 0, 0);\
MOV_OP_REGMEM(op, 0, 1);\
}
#define MOVH_OP2(op)\
{\
MOV_OP_REGMEM(op, 1, 0);\
MOV_OP_REGMEM(op, 1, 1);\
}
#define MOVNT_OP(op, quad)\
{\
r.q[0] = r.q[1] = 0;\
if (quad) {\
asm volatile (#op " %1, %0" : "=m" (r.dq) : "x" (a.dq));\
printf("%-9s: a=" FMT64X "" FMT64X " r=" FMT64X "" FMT64X "\n", #op, a.q[1], a.q[0], r.q[1], r.q[0]);\
} else {\
asm volatile (#op " %1, %0" : "=m" (r.q[0]) : "y" (a.q[0]));\
printf("%-9s: a=" FMT64X " r=" FMT64X "\n", #op, a.q[0], r.q[0]);\
}\
}
#define MOVNT_OP2(op,quad)\
{\
int i;\
for(i=0;i<2;i++) {\
a.q[0] = test_values[2*i][0];\
a.q[1] = test_values[2*i][1];\
MOVNT_OP(op, quad);\
}\
}
#define MOVU_OP(op)\
{\
asm volatile (#op " %1, %0" : "=x" (r.dq) : "x" (a.dq));\
printf("%-9s: a=" FMT64X "" FMT64X " r=" FMT64X "" FMT64X "\n",#op, a.q[1], a.q[0], r.q[1], r.q[0]);\
}
#define MOVU_OP2(op)\
{\
int i;\
for(i=0;i<2;i++) {\
a.q[0] = test_values[2*i][0];\
a.q[1] = test_values[2*i][1];\
MOVU_OP(op);\
}\
}

#define SSE_OP(op)\
{\
asm volatile (#op " %2, %0" : "=x" (r.dq) : "0" (a.dq), "x" (b.dq));\
Expand Down Expand Up @@ -2701,6 +2768,21 @@ void test_sse(void)
// CVT_OP_XMM(cvtdq2ps);
// CVT_OP_XMM(cvtdq2pd);

/* sse/sse2 moves */
MOVL_OP2(movlps);
MOVH_OP2(movhps);
MOVL_OP2(movlpd);
MOVH_OP2(movhpd);
MOVNT_OP2(movntq, 0);
MOVNT_OP2(movntdq, 1);
MOVU_OP2(movups);
MOVU_OP2(movupd);

/* misc sse ops*/
SSE_OP2(minss);
SSE_OP2(maxss);
SSE_OP2(sqrtss);

/* XXX: test PNI insns */
#if 0
SSE_OP2(movshdup);
Expand Down