Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RV64_DYNAREC] Refactored vector SEW tracking #1820

Merged
merged 1 commit into from
Sep 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/dynarec/arm64/dynarec_arm64_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ typedef struct instruction_arm64_s {
uint16_t ymm0_in; // bitmap of ymm to zero at purge
uint16_t ymm0_add; // the ymm0 added by the opcode
uint16_t ymm0_sub; // the ymm0 removed by the opcode
uint16_t ymm0_out; // the ymmm0 at th end of the opcode
uint16_t ymm0_out; // the ymm0 at th end of the opcode
uint16_t ymm0_pass2, ymm0_pass3;
uint8_t barrier_maybe;
uint8_t will_write;
Expand Down
2 changes: 1 addition & 1 deletion src/dynarec/la64/dynarec_la64_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ typedef struct instruction_la64_s {
uint16_t ymm0_in; // bitmap of ymm to zero at purge
uint16_t ymm0_add; // the ymm0 added by the opcode
uint16_t ymm0_sub; // the ymm0 removed by the opcode
uint16_t ymm0_out; // the ymmm0 at th end of the opcode
uint16_t ymm0_out; // the ymm0 at th end of the opcode
uint16_t ymm0_pass2, ymm0_pass3;
uint8_t barrier_maybe;
uint8_t will_write;
Expand Down
12 changes: 7 additions & 5 deletions src/dynarec/rv64/dynarec_rv64_functions.c
Original file line number Diff line number Diff line change
Expand Up @@ -379,11 +379,11 @@ int sewNeedsTransform(dynarec_rv64_t* dyn, int ninst)
{
int i2 = dyn->insts[ninst].x64.jmp_insts;

if (dyn->insts[i2].vector_sew == VECTOR_SEWNA)
if (dyn->insts[i2].vector_sew_entry == VECTOR_SEWNA)
return 0;
else if (dyn->insts[i2].vector_sew == VECTOR_SEWANY && dyn->insts[ninst].vector_sew != VECTOR_SEWNA)
else if (dyn->insts[i2].vector_sew_entry == VECTOR_SEWANY && dyn->insts[ninst].vector_sew_exit != VECTOR_SEWNA)
return 0;
else if (dyn->insts[i2].vector_sew == dyn->insts[ninst].vector_sew)
else if (dyn->insts[i2].vector_sew_entry == dyn->insts[ninst].vector_sew_exit)
return 0;

return 1;
Expand Down Expand Up @@ -616,7 +616,7 @@ void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t r
};
if(box64_dynarec_dump) {
printf_x64_instruction(rex.is32bits?my_context->dec32:my_context->dec, &dyn->insts[ninst].x64, name);
dynarec_log(LOG_NONE, "%s%p: %d emitted opcodes, inst=%d, barrier=%d state=%d/%d(%d), %s=%X/%X, use=%X, need=%X/%X, sm=%d/%d, sew=%d",
dynarec_log(LOG_NONE, "%s%p: %d emitted opcodes, inst=%d, barrier=%d state=%d/%d(%d), %s=%X/%X, use=%X, need=%X/%X, sm=%d/%d, sew@entry=%d, sew@exit=%d",
(box64_dynarec_dump > 1) ? "\e[32m" : "",
(void*)(dyn->native_start + dyn->insts[ninst].address),
dyn->insts[ninst].size / 4,
Expand All @@ -631,7 +631,7 @@ void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t r
dyn->insts[ninst].x64.use_flags,
dyn->insts[ninst].x64.need_before,
dyn->insts[ninst].x64.need_after,
dyn->smread, dyn->smwrite, dyn->insts[ninst].vector_sew);
dyn->smread, dyn->smwrite, dyn->insts[ninst].vector_sew_entry, dyn->insts[ninst].vector_sew_exit);
if(dyn->insts[ninst].pred_sz) {
dynarec_log(LOG_NONE, ", pred=");
for(int ii=0; ii<dyn->insts[ninst].pred_sz; ++ii)
Expand Down Expand Up @@ -722,6 +722,7 @@ void fpu_reset(dynarec_rv64_t* dyn)
mmx_reset(&dyn->e);
sse_reset(&dyn->e);
fpu_reset_reg(dyn);
dyn->vector_sew = VECTOR_SEWNA;
}

void fpu_reset_ninst(dynarec_rv64_t* dyn, int ninst)
Expand All @@ -730,6 +731,7 @@ void fpu_reset_ninst(dynarec_rv64_t* dyn, int ninst)
mmx_reset(&dyn->insts[ninst].e);
sse_reset(&dyn->insts[ninst].e);
fpu_reset_reg_extcache(&dyn->insts[ninst].e);
dyn->vector_sew = VECTOR_SEWNA;
}

int fpu_is_st_freed(dynarec_rv64_t* dyn, int ninst, int st)
Expand Down
10 changes: 5 additions & 5 deletions src/dynarec/rv64/dynarec_rv64_helper.c
Original file line number Diff line number Diff line change
Expand Up @@ -2432,9 +2432,9 @@ static void sewTransform(dynarec_rv64_t* dyn, int ninst, int s1)
int j64;
int jmp = dyn->insts[ninst].x64.jmp_insts;
if (jmp < 0) return;
if (dyn->insts[jmp].vector_sew == VECTOR_SEWNA) return;
MESSAGE(LOG_DUMP, "\tSEW changed to %d ---- ninst=%d -> %d\n", dyn->insts[jmp].vector_sew, ninst, jmp);
vector_vsetvl_emul1(dyn, ninst, s1, dyn->insts[jmp].vector_sew, 1);
if (dyn->insts[jmp].vector_sew_entry == VECTOR_SEWNA) return;
MESSAGE(LOG_DUMP, "\tSEW changed to %d ---- ninst=%d -> %d\n", dyn->insts[jmp].vector_sew_entry, ninst, jmp);
vector_vsetvl_emul1(dyn, ninst, s1, dyn->insts[jmp].vector_sew_entry, 1);
}

void CacheTransform(dynarec_rv64_t* dyn, int ninst, int cacheupd, int s1, int s2, int s3)
Expand Down Expand Up @@ -2528,10 +2528,10 @@ void fpu_reset_cache(dynarec_rv64_t* dyn, int ninst, int reset_n)
#if STEP > 1
// for STEP 2 & 3, just need to refresh with current, and undo the changes (push & swap)
dyn->e = dyn->insts[ninst].e;
dyn->vector_sew = dyn->insts[ninst].vector_sew;
dyn->vector_sew = dyn->insts[ninst].vector_sew_exit;
#else
dyn->e = dyn->insts[reset_n].e;
dyn->vector_sew = dyn->insts[reset_n].vector_sew;
dyn->vector_sew = dyn->insts[reset_n].vector_sew_exit;
#endif
extcacheUnwind(&dyn->e);
#if STEP == 0
Expand Down
33 changes: 16 additions & 17 deletions src/dynarec/rv64/dynarec_rv64_pass0.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,25 +19,24 @@
#define JUMP(A, C) add_jump(dyn, ninst); add_next(dyn, (uintptr_t)A); SMEND(); dyn->insts[ninst].x64.jmp = A; dyn->insts[ninst].x64.jmp_cond = C; dyn->insts[ninst].x64.jmp_insts = 0
#define BARRIER(A) if(A!=BARRIER_MAYBE) {fpu_purgecache(dyn, ninst, 0, x1, x2, x3); dyn->insts[ninst].x64.barrier = A;} else dyn->insts[ninst].barrier_maybe = 1
#define SET_HASCALLRET() dyn->insts[ninst].x64.has_callret = 1
#define NEW_INST \
++dyn->size; \
memset(&dyn->insts[ninst], 0, sizeof(instruction_native_t)); \
dyn->insts[ninst].x64.addr = ip; \
dyn->e.combined1 = dyn->e.combined2 = 0; \
dyn->e.swapped = 0; \
dyn->e.barrier = 0; \
for (int i = 0; i < 16; ++i) \
dyn->e.olds[i].v = 0; \
dyn->insts[ninst].f_entry = dyn->f; \
if (reset_n == -1) \
dyn->vector_sew = ninst ? dyn->insts[ninst - 1].vector_sew : VECTOR_SEWNA; \
if (ninst) \
#define NEW_INST \
++dyn->size; \
memset(&dyn->insts[ninst], 0, sizeof(instruction_native_t)); \
dyn->insts[ninst].x64.addr = ip; \
dyn->e.combined1 = dyn->e.combined2 = 0; \
dyn->e.swapped = 0; \
dyn->e.barrier = 0; \
for (int i = 0; i < 16; ++i) \
dyn->e.olds[i].v = 0; \
dyn->insts[ninst].f_entry = dyn->f; \
dyn->insts[ninst].vector_sew_entry = dyn->vector_sew; \
if (ninst) \
dyn->insts[ninst - 1].x64.size = dyn->insts[ninst].x64.addr - dyn->insts[ninst - 1].x64.addr;

#define INST_EPILOG \
dyn->insts[ninst].f_exit = dyn->f; \
dyn->insts[ninst].e = dyn->e; \
dyn->insts[ninst].vector_sew = dyn->vector_sew; \
#define INST_EPILOG \
dyn->insts[ninst].f_exit = dyn->f; \
dyn->insts[ninst].e = dyn->e; \
dyn->insts[ninst].vector_sew_exit = dyn->vector_sew; \
dyn->insts[ninst].x64.has_next = (ok > 0) ? 1 : 0;
#define INST_NAME(name)
#define DEFAULT \
Expand Down
22 changes: 11 additions & 11 deletions src/dynarec/rv64/dynarec_rv64_pass1.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,18 @@
#define FINI
#define MESSAGE(A, ...) do {} while (0)
#define EMIT(A) do {} while (0)
#define NEW_INST \
dyn->insts[ninst].f_entry = dyn->f; \
dyn->e.combined1 = dyn->e.combined2 = 0; \
for (int i = 0; i < 16; ++i) \
dyn->e.olds[i].v = 0; \
if (reset_n != -1) \
dyn->vector_sew = ninst ? dyn->insts[ninst - 1].vector_sew : VECTOR_SEWNA; \
dyn->e.swapped = 0; \
#define NEW_INST \
dyn->insts[ninst].f_entry = dyn->f; \
dyn->e.combined1 = dyn->e.combined2 = 0; \
for (int i = 0; i < 16; ++i) \
dyn->e.olds[i].v = 0; \
dyn->insts[ninst].vector_sew_entry = dyn->vector_sew; \
dyn->e.swapped = 0; \
dyn->e.barrier = 0

#define INST_EPILOG \
dyn->insts[ninst].e = dyn->e; \
dyn->insts[ninst].f_exit = dyn->f
#define INST_EPILOG \
dyn->insts[ninst].e = dyn->e; \
dyn->insts[ninst].f_exit = dyn->f; \
dyn->insts[ninst].vector_sew_exit = dyn->vector_sew;

#define INST_NAME(name)
2 changes: 0 additions & 2 deletions src/dynarec/rv64/dynarec_rv64_pass2.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
#define MESSAGE(A, ...) do {} while (0)
#define EMIT(A) do {dyn->insts[ninst].size+=4; dyn->native_size+=4;}while(0)
#define NEW_INST \
if (reset_n != -1) \
dyn->vector_sew = ninst ? dyn->insts[ninst - 1].vector_sew : VECTOR_SEWNA; \
if (ninst) { \
dyn->insts[ninst].address = (dyn->insts[ninst - 1].address + dyn->insts[ninst - 1].size); \
dyn->insts_size += 1 + ((dyn->insts[ninst - 1].x64.size > (dyn->insts[ninst - 1].size / 4)) ? dyn->insts[ninst - 1].x64.size : (dyn->insts[ninst - 1].size / 4)) / 15; \
Expand Down
2 changes: 0 additions & 2 deletions src/dynarec/rv64/dynarec_rv64_pass3.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@

#define MESSAGE(A, ...) if(box64_dynarec_dump) dynarec_log(LOG_NONE, __VA_ARGS__)
#define NEW_INST \
if (reset_n != -1) \
dyn->vector_sew = ninst ? dyn->insts[ninst - 1].vector_sew : VECTOR_SEWNA; \
if (box64_dynarec_dump) print_newinst(dyn, ninst); \
if (ninst) { \
addInst(dyn->instsize, &dyn->insts_size, dyn->insts[ninst - 1].x64.size, dyn->insts[ninst - 1].size / 4); \
Expand Down
9 changes: 5 additions & 4 deletions src/dynarec/rv64/dynarec_rv64_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,13 +108,14 @@ typedef struct instruction_rv64_s {
uint16_t ymm0_in; // bitmap of ymm to zero at purge
uint16_t ymm0_add; // the ymm0 added by the opcode
uint16_t ymm0_sub; // the ymm0 removed by the opcode
uint16_t ymm0_out; // the ymmm0 at th end of the opcode
uint16_t ymm0_out; // the ymm0 at th end of the opcode
uint16_t ymm0_pass2, ymm0_pass3;
int barrier_maybe;
flagcache_t f_exit; // flags status at end of instruction
extcache_t e; // extcache at end of instruction (but before poping)
flagcache_t f_entry; // flags status before the instruction begin
uint8_t vector_sew;
uint8_t vector_sew_entry; // sew status before the instruction begin
uint8_t vector_sew_exit; // sew status at the end of instruction
} instruction_rv64_t;

typedef struct dynarec_rv64_s {
Expand Down Expand Up @@ -153,8 +154,8 @@ typedef struct dynarec_rv64_s {
uint16_t ymm_zero; // bitmap of ymm to zero at purge
uint8_t always_test;
uint8_t abort;
uint8_t vector_sew;
uint8_t vector_eew; // effective element width
uint8_t vector_sew; // current sew status
uint8_t vector_eew; // current effective sew status, should only be used after SET_ELEMENT_WIDTH
} dynarec_rv64_t;

// v0 is hardware wired to vector mask register, which should be always reserved
Expand Down