Skip to content

Commit

Permalink
Merge pull request ish-app#1926 from jason-conway/i-make-money-movs
Browse files Browse the repository at this point in the history
  • Loading branch information
tbodt authored Oct 23, 2022
2 parents 903919e + f76e46a commit 4d722ed
Show file tree
Hide file tree
Showing 5 changed files with 169 additions and 12 deletions.
32 changes: 26 additions & 6 deletions emu/decode.h
Original file line number Diff line number Diff line change
Expand Up @@ -268,11 +268,18 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
READMODRM; VMOV(xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x11: TRACEI("movupd xmm, xmm:modrm");
READMODRM; VMOV(xmm_modrm_reg, xmm_modrm_val,128); break;
case 0x12: TRACEI("movlpd xmm, modrm");
READMODRM; V_OP(movl_p, modrm_val, xmm_modrm_reg,64); break;
case 0x13: TRACEI("movlpd modrm, xmm");
READMODRM; V_OP(movl_pm, xmm_modrm_reg, modrm_val,64); break;
case 0x14: TRACEI("unpcklpd xmm, xmm:modrm");
READMODRM; V_OP(unpackl_pd, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x15: TRACEI("unpckhpd xmm, xmm:modrm");
READMODRM; V_OP(unpackh_pd, xmm_modrm_val, xmm_modrm_reg,128); break;

case 0x16: TRACEI("movhpd xmm, modrm");
READMODRM; V_OP(movh_p, modrm_val, xmm_modrm_reg,64); break;
case 0x17: TRACEI("movhpd modrm, xmm");
READMODRM; V_OP(movh_pm, xmm_modrm_reg, modrm_val,64); break;
case 0x2e: TRACEI("ucomisd xmm, xmm:modrm");
READMODRM; V_OP(single_ucomi, xmm_modrm_val, xmm_modrm_reg,64); break;
case 0x2f: TRACEI("comisd xmm, xmm:modrm");
Expand Down Expand Up @@ -422,6 +429,8 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
READMODRM; V_OP(mulu, xmm_modrm_val, xmm_modrm_reg, 128); break;
case 0xe6: TRACEI("cvttpd2dq xmm:modrm, xmm");
READMODRM; V_OP(cvttpd2dq, xmm_modrm_val, xmm_modrm_reg,64); break;
case 0xe7: TRACEI("movntdq xmm, xmm:modrm");
READMODRM; VMOV(xmm_modrm_reg, xmm_modrm_val,128); break;
case 0xe8: TRACEI("psubsb xmm:modrm, xmm");
READMODRM; V_OP(subss_b, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xe9: TRACEI("psubsw xmm:modrm, xmm");
Expand Down Expand Up @@ -465,14 +474,18 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
READMODRM; VMOV(xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x11: TRACEI("movups xmm, xmm:modrm");
READMODRM; VMOV(xmm_modrm_reg, xmm_modrm_val,128); break;

case 0x12: TRACEI("movlps xmm, modrm");
READMODRM; V_OP(movl_p, modrm_val, xmm_modrm_reg,64); break;
case 0x13: TRACEI("movlps modrm, xmm");
READMODRM; V_OP(movl_pm, xmm_modrm_reg, modrm_val,64); break;
case 0x14: TRACEI("unpcklps xmm, xmm:modrm");
READMODRM; V_OP(unpackl_ps, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x15: TRACEI("unpckhps xmm, xmm:modrm");
READMODRM; V_OP(unpackh_ps, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x16: TRACEI("movlhps xmm, xmm:modrm");
READMODRM; V_OP(movlh_ps, xmm_modrm_val, xmm_modrm_reg,128); break;

case 0x16: TRACEI("movhps xmm, modrm");
READMODRM; V_OP(movh_p, modrm_val, xmm_modrm_reg,64); break;
case 0x17: TRACEI("movhps modrm, xmm");
READMODRM; V_OP(movh_pm, xmm_modrm_reg, modrm_val,64); break;
case 0x2e: TRACEI("ucomiss xmm, xmm:modrm");
READMODRM; V_OP(single_ucomi, xmm_modrm_val, xmm_modrm_reg,32); break;
case 0x2f: TRACEI("comiss xmm, xmm:modrm");
Expand Down Expand Up @@ -530,7 +543,8 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {

case 0xe5: TRACEI("pmulhw mm:modrm, mm");
READMODRM; V_OP(mulu, mm_modrm_val, mm_modrm_reg,64); break;

case 0xe7: TRACEI("movntq mm, mm:modrm");
READMODRM_MEM; VMOV(mm_modrm_reg, mm_modrm_val,64); break;
case 0xef: TRACEI("pxor mm:modrm, mm");
READMODRM; V_OP(xor, mm_modrm_val, mm_modrm_reg,64); break;

Expand Down Expand Up @@ -1186,6 +1200,8 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
READMODRM; V_OP(cvtsi2ss, modrm_val, xmm_modrm_reg,32); break;
case 0x2c: TRACEI("cvttss2si reg, xmm:modrm");
READMODRM; V_OP(cvttss2si, xmm_modrm_val, modrm_reg,32); break;
case 0x51: TRACEI("sqrtss xmm:modrm, xmm");
READMODRM; V_OP(single_fsqrt, xmm_modrm_val, xmm_modrm_reg,32); break;
case 0x5a: TRACEI("cvtss2sd xmm:modrm, xmm");
READMODRM; V_OP(cvtss2sd, xmm_modrm_val, xmm_modrm_reg,32); break;
case 0x5b: TRACEI("cvttps2dq xmm:modrm, xmm");
Expand All @@ -1197,8 +1213,12 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
READMODRM; V_OP(single_fmul, xmm_modrm_val, xmm_modrm_reg,32); break;
case 0x5c: TRACEI("subss xmm:modrm, xmm");
READMODRM; V_OP(single_fsub, xmm_modrm_val, xmm_modrm_reg,32); break;
case 0x5d: TRACEI("minss xmm:modrm, xmm");
READMODRM; V_OP(single_fmin, xmm_modrm_val, xmm_modrm_reg,32); break;
case 0x5e: TRACEI("divss xmm:modrm, xmm");
READMODRM; V_OP(single_fdiv, xmm_modrm_val, xmm_modrm_reg,32); break;
case 0x5f: TRACEI("maxss xmm:modrm, xmm");
READMODRM; V_OP(single_fmax, xmm_modrm_val, xmm_modrm_reg,32); break;
case 0x6f: TRACEI("movdqu xmm:modrm, xmm");
READMODRM; VMOV(xmm_modrm_val, xmm_modrm_reg,128); break;

Expand Down
23 changes: 20 additions & 3 deletions emu/vec.c
Original file line number Diff line number Diff line change
Expand Up @@ -381,13 +381,20 @@ void vec_single_fdiv64(NO_CPU, const double *src, double *dst) { *dst /= *src; }
void vec_single_fdiv32(NO_CPU, const float *src, float *dst) { *dst /= *src; }

void vec_single_fsqrt64(NO_CPU, const double *src, double *dst) { *dst = sqrt(*src); }
void vec_single_fsqrt32(NO_CPU, const float *src, float *dst) { *dst = sqrtf(*src); }

void vec_single_fmax64(NO_CPU, const double *src, double *dst) {
if (*src > *dst || isnan(*src) || isnan(*dst)) *dst = *src;
}
void vec_single_fmin64(NO_CPU, const double *src, double *dst) {
if (*src < *dst || isnan(*src) || isnan(*dst)) *dst = *src;
}
void vec_single_fmax32(NO_CPU, const float *src, float *dst) {
if (*src > *dst || isnan(*src) || isnan(*dst)) *dst = *src;
}
void vec_single_fmin32(NO_CPU, const float *src, float *dst) {
if (*src < *dst || isnan(*src) || isnan(*dst)) *dst = *src;
}

void vec_single_ucomi32(struct cpu_state *cpu, const float *src, const float *dst) {
cpu->zf_res = cpu->pf_res = 0;
Expand Down Expand Up @@ -531,9 +538,6 @@ void vec_unpackh_pd128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
dst->f64[0] = dst->f64[1];
dst->f64[1] = src->f64[1];
}
void vec_movlh_ps128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
dst->qw[1] = src->qw[0];
}

void vec_packss_w128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
dst->u32[0] = (satsw(dst->u16[0]) << 0x00) | (satsw(dst->u16[1]) << 0x08) |
Expand Down Expand Up @@ -622,6 +626,19 @@ void vec_fmovmask_d128(NO_CPU, const union xmm_reg *src, uint32_t *dst) {
}
}

void vec_movl_p64(NO_CPU, const uint64_t *src, union xmm_reg *dst) {
dst->qw[0] = *src;
}
void vec_movl_pm64(NO_CPU, const union xmm_reg *src, uint64_t *dst) {
*dst = src->qw[0];
}
void vec_movh_p64(NO_CPU, const uint64_t *src, union xmm_reg *dst) {
dst->qw[1] = *src;
}
void vec_movh_pm64(NO_CPU, const union xmm_reg *src, uint64_t *dst) {
*dst = src->qw[1];
}

void vec_extract_w128(NO_CPU, const union xmm_reg *src, uint32_t *dst, uint8_t index) {
*dst = src->u16[index % 8];
}
Expand Down
10 changes: 9 additions & 1 deletion emu/vec.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,12 @@ void vec_single_fsub32(NO_CPU, const float *src, float *dst);
void vec_single_fdiv64(NO_CPU, const double *src, double *dst);
void vec_single_fdiv32(NO_CPU, const float *src, float *dst);
void vec_single_fsqrt64(NO_CPU, const double *src, double *dst);
void vec_single_fsqrt32(NO_CPU, const float *src, float *dst);

void vec_single_fmax64(NO_CPU, const double *src, double *dst);
void vec_single_fmax32(NO_CPU, const float *src, float *dst);
void vec_single_fmin64(NO_CPU, const double *src, double *dst);
void vec_single_fmin32(NO_CPU, const float *src, float *dst);
void vec_single_ucomi32(struct cpu_state *cpu, const float *src, const float *dst);
void vec_single_ucomi64(struct cpu_state *cpu, const double *src, const double *dst);
void vec_single_fcmp64(NO_CPU, const double *src, union xmm_reg *dst, uint8_t type);
Expand Down Expand Up @@ -124,7 +127,7 @@ void vec_unpackh_d128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_unpackh_dq128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_unpackh_ps128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_unpackh_pd128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_movlh_ps128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);

void vec_shuffle_lw128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst, uint8_t encoding);
void vec_shuffle_hw128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst, uint8_t encoding);
void vec_shuffle_d128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst, uint8_t encoding);
Expand All @@ -136,6 +139,11 @@ void vec_compares_gtb128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_compares_gtw128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_compares_gtd128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);

void vec_movl_p64(NO_CPU, const uint64_t *src, union xmm_reg *dst);
void vec_movl_pm64(NO_CPU, const union xmm_reg *src, uint64_t *dst);
void vec_movh_p64(NO_CPU, const uint64_t *src, union xmm_reg *dst);
void vec_movh_pm64(NO_CPU, const union xmm_reg *src, uint64_t *dst);

void vec_movmask_b128(NO_CPU, const union xmm_reg *src, uint32_t *dst);
void vec_fmovmask_d128(NO_CPU, const union xmm_reg *src, uint32_t *dst);
void vec_extract_w128(NO_CPU, const union xmm_reg *src, uint32_t *dst, uint8_t index);
Expand Down
30 changes: 30 additions & 0 deletions tests/e2e/qemu/expected.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4696,3 +4696,33 @@ cvttsd2si: a=c00b3333333333334004cccccccccccd r=00000002
cvttpd2dq: a=c00b3333333333334004cccccccccccd r=0000000000000000fffffffd00000002
cvtsi2ss : a=fffffffa r=0000000000000000fffffffdc0c00000
cvtsi2sd : a=fffffffa r=0000000000000000c018000000000000
movlps : a=456723c698694873 r=0000000000000000456723c698694873
movlps : a=007c62c2085427f8 r=0000000000000000007c62c2085427f8
movlps : a=dc515cff944a58ec456723c698694873 r=456723c698694873
movlps : a=231be9e8cde7438d007c62c2085427f8 r=007c62c2085427f8
movhps : a=dc515cff944a58ec r=dc515cff944a58ec007c62c2085427f8
movhps : a=231be9e8cde7438d r=231be9e8cde7438d007c62c2085427f8
movhps : a=dc515cff944a58ec456723c698694873 r=dc515cff944a58ec
movhps : a=231be9e8cde7438d007c62c2085427f8 r=231be9e8cde7438d
movlpd : a=456723c698694873 r=231be9e8cde7438d456723c698694873
movlpd : a=007c62c2085427f8 r=231be9e8cde7438d007c62c2085427f8
movlpd : a=dc515cff944a58ec456723c698694873 r=456723c698694873
movlpd : a=231be9e8cde7438d007c62c2085427f8 r=007c62c2085427f8
movhpd : a=dc515cff944a58ec r=dc515cff944a58ec007c62c2085427f8
movhpd : a=231be9e8cde7438d r=231be9e8cde7438d007c62c2085427f8
movhpd : a=dc515cff944a58ec456723c698694873 r=dc515cff944a58ec
movhpd : a=231be9e8cde7438d007c62c2085427f8 r=231be9e8cde7438d
movntq : a=456723c698694873 r=456723c698694873
movntq : a=007c62c2085427f8 r=007c62c2085427f8
movntdq : a=dc515cff944a58ec456723c698694873 r=dc515cff944a58ec456723c698694873
movntdq : a=231be9e8cde7438d007c62c2085427f8 r=231be9e8cde7438d007c62c2085427f8
movups : a=dc515cff944a58ec456723c698694873 r=dc515cff944a58ec456723c698694873
movups : a=231be9e8cde7438d007c62c2085427f8 r=231be9e8cde7438d007c62c2085427f8
movupd : a=dc515cff944a58ec456723c698694873 r=dc515cff944a58ec456723c698694873
movupd : a=231be9e8cde7438d007c62c2085427f8 r=231be9e8cde7438d007c62c2085427f8
minss : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=dc515cff944a58ec456723c698694873
minss : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=231be9e8cde7438d007c62c2085427f8
maxss : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=dc515cff944a58ec456723c658bad7ab
maxss : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=231be9e8cde7438d007c62c2085427f8
sqrtss : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=dc515cff944a58ec456723c64c1aa5bf
sqrtss : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=231be9e8cde7438d007c62c223e90c9e
86 changes: 84 additions & 2 deletions tests/e2e/qemu/qemu-test.c
Original file line number Diff line number Diff line change
Expand Up @@ -2116,8 +2116,8 @@ static void test_enter(void)
#endif
#ifdef TEST_SSE

typedef int __m64 __attribute__ ((__mode__ (__V2SI__)));
typedef float __m128 __attribute__ ((__mode__(__V4SF__)));
typedef int __m64 __attribute__ ((vector_size (8)));
typedef float __m128 __attribute__ ((vector_size (16)));

typedef union {
double d[2];
Expand All @@ -2134,6 +2134,73 @@ static uint64_t __attribute__((aligned(16))) test_values[4][2] = {
{ 0x0f76255a085427f8, 0xc233e9e8c4c9439a },
};

#define MOV_OP(op, hi, rm)\
{\
r.q[0] = r.q[1] = 0;\
if (rm) {\
uint64_t mem;\
asm volatile (#op " %1, %0" : "=m" (mem) : "x" (a.dq));\
printf("%-9s: a=" FMT64X "" FMT64X " r=" FMT64X "\n", #op, a.q[1], a.q[0], mem);\
} else {\
uint64_t mem = a.q[hi];\
asm volatile (#op " %1, %0" : "=x" (r.dq) : "m" (mem));\
printf("%-9s: a=" FMT64X " r=" FMT64X "" FMT64X "\n", #op, mem, r.q[1], r.q[0]);\
}\
}
#define MOV_OP_REGMEM(op, hi, rm)\
{\
int i;\
for(i=0;i<2;i++) {\
a.q[0] = test_values[2*i][0];\
a.q[1] = test_values[2*i][1];\
MOV_OP(op, hi, rm);\
}\
}
#define MOVL_OP2(op)\
{\
MOV_OP_REGMEM(op, 0, 0);\
MOV_OP_REGMEM(op, 0, 1);\
}
#define MOVH_OP2(op)\
{\
MOV_OP_REGMEM(op, 1, 0);\
MOV_OP_REGMEM(op, 1, 1);\
}
#define MOVNT_OP(op, quad)\
{\
r.q[0] = r.q[1] = 0;\
if (quad) {\
asm volatile (#op " %1, %0" : "=m" (r.dq) : "x" (a.dq));\
printf("%-9s: a=" FMT64X "" FMT64X " r=" FMT64X "" FMT64X "\n", #op, a.q[1], a.q[0], r.q[1], r.q[0]);\
} else {\
asm volatile (#op " %1, %0" : "=m" (r.q[0]) : "y" (a.q[0]));\
printf("%-9s: a=" FMT64X " r=" FMT64X "\n", #op, a.q[0], r.q[0]);\
}\
}
#define MOVNT_OP2(op,quad)\
{\
int i;\
for(i=0;i<2;i++) {\
a.q[0] = test_values[2*i][0];\
a.q[1] = test_values[2*i][1];\
MOVNT_OP(op, quad);\
}\
}
#define MOVU_OP(op)\
{\
asm volatile (#op " %1, %0" : "=x" (r.dq) : "x" (a.dq));\
printf("%-9s: a=" FMT64X "" FMT64X " r=" FMT64X "" FMT64X "\n",#op, a.q[1], a.q[0], r.q[1], r.q[0]);\
}
#define MOVU_OP2(op)\
{\
int i;\
for(i=0;i<2;i++) {\
a.q[0] = test_values[2*i][0];\
a.q[1] = test_values[2*i][1];\
MOVU_OP(op);\
}\
}

#define SSE_OP(op)\
{\
asm volatile (#op " %2, %0" : "=x" (r.dq) : "0" (a.dq), "x" (b.dq));\
Expand Down Expand Up @@ -2701,6 +2768,21 @@ void test_sse(void)
// CVT_OP_XMM(cvtdq2ps);
// CVT_OP_XMM(cvtdq2pd);

/* sse/sse2 moves */
MOVL_OP2(movlps);
MOVH_OP2(movhps);
MOVL_OP2(movlpd);
MOVH_OP2(movhpd);
MOVNT_OP2(movntq, 0);
MOVNT_OP2(movntdq, 1);
MOVU_OP2(movups);
MOVU_OP2(movupd);

/* misc sse ops*/
SSE_OP2(minss);
SSE_OP2(maxss);
SSE_OP2(sqrtss);

/* XXX: test PNI insns */
#if 0
SSE_OP2(movshdup);
Expand Down

0 comments on commit 4d722ed

Please sign in to comment.