Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RV64_DYNAREC] Added more opcodes for vector #1830

Merged
merged 1 commit into from
Sep 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
184 changes: 184 additions & 0 deletions src/dynarec/rv64/dynarec_rv64_660f_vector.c
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,190 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
VXOR_VV(q0, q1, v0, VECTOR_UNMASKED);
VSUB_VV(q0, v0, q0, VECTOR_UNMASKED);
break;
case 0x20:
INST_NAME("PMOVSXBW Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
GETGX_empty_vector(q0);
GETEX_vector(q1, 0, 0, VECTOR_SEW8);
fpu_get_scratch(dyn); // HACK: skip v3, for vector register group alignment!
v0 = fpu_get_scratch(dyn);
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW8, VECTOR_LMUL1, 0.5);
VWADD_VX(v0, xZR, q1, VECTOR_UNMASKED);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
VMV_V_V(q0, v0);
break;
case 0x21:
INST_NAME("PMOVSXBD Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
GETGX_empty_vector(q0);
GETEX_vector(q1, 0, 0, VECTOR_SEW8);
fpu_get_scratch(dyn); // HACK: skip v3, for vector register group alignment!
v0 = fpu_get_scratch(dyn);
fpu_get_scratch(dyn);
v1 = fpu_get_scratch(dyn);
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW8, VECTOR_LMUL1, 0.25);
VWADD_VX(v0, xZR, q1, VECTOR_UNMASKED);
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 0.5);
VWADD_VX(v1, xZR, v0, VECTOR_UNMASKED);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
VMV_V_V(q0, v1);
break;
case 0x22:
INST_NAME("PMOVSXBQ Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
GETGX_empty_vector(q0);
GETEX_vector(q1, 0, 0, VECTOR_SEW8);
fpu_get_scratch(dyn); // HACK: skip v3, for vector register group alignment!
v0 = fpu_get_scratch(dyn);
fpu_get_scratch(dyn);
v1 = fpu_get_scratch(dyn);
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW8, VECTOR_LMUL1, 0.125);
VWADD_VX(v0, xZR, q1, VECTOR_UNMASKED);
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 0.25);
VWADD_VX(v1, xZR, v0, VECTOR_UNMASKED);
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL1, 0.5);
VWADD_VX(v0, xZR, v1, VECTOR_UNMASKED);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
VMV_V_V(q0, v0);
break;
case 0x23:
INST_NAME("PMOVSXWD Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
GETGX_empty_vector(q0);
GETEX_vector(q1, 0, 0, VECTOR_SEW16);
fpu_get_scratch(dyn); // HACK: skip v3, for vector register group alignment!
v0 = fpu_get_scratch(dyn);
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 0.5);
VWADD_VX(v0, xZR, q1, VECTOR_UNMASKED);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
VMV_V_V(q0, v0);
break;
case 0x24:
INST_NAME("PMOVSXWQ Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
GETGX_empty_vector(q0);
GETEX_vector(q1, 0, 0, VECTOR_SEW16);
fpu_get_scratch(dyn); // HACK: skip v3, for vector register group alignment!
v0 = fpu_get_scratch(dyn);
fpu_get_scratch(dyn);
v1 = fpu_get_scratch(dyn);
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 0.25);
VWADD_VX(v0, xZR, q1, VECTOR_UNMASKED);
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL1, 0.5);
VWADD_VX(v1, xZR, v0, VECTOR_UNMASKED);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
VMV_V_V(q0, v1);
break;
case 0x25:
INST_NAME("PMOVSXDQ Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
GETGX_empty_vector(q0);
GETEX_vector(q1, 0, 0, VECTOR_SEW32);
fpu_get_scratch(dyn); // HACK: skip v3, for vector register group alignment!
v0 = fpu_get_scratch(dyn);
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL1, 0.5);
VWADD_VX(v0, xZR, q1, VECTOR_UNMASKED);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
VMV_V_V(q0, v0);
break;
case 0x30:
INST_NAME("PMOVZXBW Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
GETGX_empty_vector(q0);
GETEX_vector(q1, 0, 0, VECTOR_SEW8);
fpu_get_scratch(dyn); // HACK: skip v3, for vector register group alignment!
v0 = fpu_get_scratch(dyn);
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW8, VECTOR_LMUL1, 0.5);
VWADDU_VX(v0, xZR, q1, VECTOR_UNMASKED);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
VMV_V_V(q0, v0);
break;
case 0x31:
INST_NAME("PMOVZXBD Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
GETGX_empty_vector(q0);
GETEX_vector(q1, 0, 0, VECTOR_SEW8);
fpu_get_scratch(dyn); // HACK: skip v3, for vector register group alignment!
v0 = fpu_get_scratch(dyn);
fpu_get_scratch(dyn);
v1 = fpu_get_scratch(dyn);
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW8, VECTOR_LMUL1, 0.25);
VWADDU_VX(v0, xZR, q1, VECTOR_UNMASKED);
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 0.5);
VWADDU_VX(v1, xZR, v0, VECTOR_UNMASKED);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
VMV_V_V(q0, v1);
break;
case 0x32:
INST_NAME("PMOVZXBQ Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
GETGX_empty_vector(q0);
GETEX_vector(q1, 0, 0, VECTOR_SEW8);
fpu_get_scratch(dyn); // HACK: skip v3, for vector register group alignment!
v0 = fpu_get_scratch(dyn);
fpu_get_scratch(dyn);
v1 = fpu_get_scratch(dyn);
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW8, VECTOR_LMUL1, 0.125);
VWADDU_VX(v0, xZR, q1, VECTOR_UNMASKED);
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 0.25);
VWADDU_VX(v1, xZR, v0, VECTOR_UNMASKED);
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL1, 0.5);
VWADDU_VX(v0, xZR, v1, VECTOR_UNMASKED);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
VMV_V_V(q0, v0);
break;
case 0x33:
INST_NAME("PMOVZXWD Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
GETGX_empty_vector(q0);
GETEX_vector(q1, 0, 0, VECTOR_SEW16);
fpu_get_scratch(dyn); // HACK: skip v3, for vector register group alignment!
v0 = fpu_get_scratch(dyn);
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 0.5);
VWADDU_VX(v0, xZR, q1, VECTOR_UNMASKED);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
VMV_V_V(q0, v0);
break;
case 0x34:
INST_NAME("PMOVZXWQ Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
GETGX_empty_vector(q0);
GETEX_vector(q1, 0, 0, VECTOR_SEW16);
fpu_get_scratch(dyn); // HACK: skip v3, for vector register group alignment!
v0 = fpu_get_scratch(dyn);
fpu_get_scratch(dyn);
v1 = fpu_get_scratch(dyn);
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 0.25);
VWADDU_VX(v0, xZR, q1, VECTOR_UNMASKED);
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL1, 0.5);
VWADDU_VX(v1, xZR, v0, VECTOR_UNMASKED);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
VMV_V_V(q0, v1);
break;
case 0x35:
INST_NAME("PMOVZXDQ Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
GETGX_empty_vector(q0);
GETEX_vector(q1, 0, 0, VECTOR_SEW32);
fpu_get_scratch(dyn); // HACK: skip v3, for vector register group alignment!
v0 = fpu_get_scratch(dyn);
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL1, 0.5);
VWADDU_VX(v0, xZR, q1, VECTOR_UNMASKED);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
VMV_V_V(q0, v0);
break;
default:
DEFAULT_VECTOR;
}
Expand Down
4 changes: 2 additions & 2 deletions src/dynarec/rv64/dynarec_rv64_helper.c
Original file line number Diff line number Diff line change
Expand Up @@ -2594,7 +2594,7 @@ void fpu_propagate_stack(dynarec_rv64_t* dyn, int ninst)
}

// Simple wrapper for vsetvli
int vector_vsetvli(dynarec_rv64_t* dyn, int ninst, int s1, int sew, int vlmul, int multiple)
int vector_vsetvli(dynarec_rv64_t* dyn, int ninst, int s1, int sew, int vlmul, float multiple)
{
if (sew == VECTOR_SEWNA) return VECTOR_SEW8;
if (sew == VECTOR_SEWANY) sew = VECTOR_SEW8;
Expand All @@ -2605,7 +2605,7 @@ int vector_vsetvli(dynarec_rv64_t* dyn, int ninst, int s1, int sew, int vlmul, i
*
* mu tu sew lmul */
uint32_t vtypei = (0b0 << 7) | (0b0 << 6) | (sew << 3) | vlmul;
ADDI(s1, xZR, (16 >> sew) * multiple); // TODO: it's possible to reuse s1 sometimes
ADDI(s1, xZR, (int)((float)(16 >> sew) * multiple)); // TODO: it's possible to reuse s1 sometimes
VSETVLI(xZR, s1, vtypei);
return sew;
}
2 changes: 1 addition & 1 deletion src/dynarec/rv64/dynarec_rv64_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -1442,7 +1442,7 @@ void CacheTransform(dynarec_rv64_t* dyn, int ninst, int cacheupd, int s1, int s2
void rv64_move64(dynarec_rv64_t* dyn, int ninst, int reg, int64_t val);
void rv64_move32(dynarec_rv64_t* dyn, int ninst, int reg, int32_t val, int zeroup);

int vector_vsetvli(dynarec_rv64_t* dyn, int ninst, int s1, int sew, int vlmul, int multiple);
int vector_vsetvli(dynarec_rv64_t* dyn, int ninst, int s1, int sew, int vlmul, float multiple);

#if STEP < 2
#define CHECK_CACHE() 0
Expand Down