Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RISC-V] Fix Shuffling Thunks part 2 #90707

Merged
merged 3 commits into from
Aug 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 6 additions & 20 deletions src/coreclr/vm/riscv64/cgencpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -329,15 +329,7 @@ const IntReg RegRa = IntReg(1);

class StubLinkerCPU : public StubLinker
{

public:

// BitFlags for EmitLoadStoreReg(Pair)Imm methods
enum {
eSTORE = 0x0,
eLOAD = 0x1,
};

static void Init();
static bool isValidSimm12(int value) {
return -( ((int)1) << 11 ) <= value && value < ( ((int)1) << 11 );
Expand All @@ -352,28 +344,22 @@ class StubLinkerCPU : public StubLinker
void EmitComputedInstantiatingMethodStub(MethodDesc* pSharedMD, struct ShuffleEntry *pShuffleEntryArray, void* extraArg);
#endif // FEATURE_SHARE_GENERIC_CODE

#ifdef _DEBUG
void EmitNop() { _ASSERTE(!"RISCV64:NYI "); }
#endif
void EmitBreakPoint() { _ASSERTE(!"RISCV64:NYI"); }
private:
void EmitMovConstant(IntReg target, UINT64 constant);
void EmitCmpImm(IntReg reg, int imm);
void EmitCmpReg(IntReg Xn, IntReg Xm);
void EmitCondFlagJump(CodeLabel * target, UINT cond);
void EmitJumpRegister(IntReg regTarget);
void EmitMovReg(IntReg dest, IntReg source);
void EmitMovReg(FloatReg dest, FloatReg source);

void EmitSubImm(IntReg Xd, IntReg Xn, unsigned int value);
void EmitAddImm(IntReg Xd, IntReg Xn, unsigned int value);
void EmitSllImm(IntReg Xd, IntReg Xn, unsigned int value);
void EmitLuImm(IntReg Xd, unsigned int value);

void EmitLoadStoreRegImm(DWORD flags, IntReg Xt, IntReg Xn, int offset=0);
void EmitLoadStoreRegImm(DWORD flags, FloatReg Ft, IntReg Xn, int offset=0);

void EmitLoadFloatRegImm(FloatReg ft, IntReg base, int offset);
void EmitLoad(IntReg dest, IntReg srcAddr, int offset = 0);
void EmitLoad(FloatReg dest, IntReg srcAddr, int offset = 0);
void EmitStore(IntReg src, IntReg destAddr, int offset = 0);
void EmitStore(FloatReg src, IntReg destAddr, int offset = 0);

void EmitCallRegister(IntReg reg);
void EmitRet(IntReg reg);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably, you need to remove EmitRet() declaration too since removed implementation

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch, will add to next PR.

};

Expand Down
161 changes: 77 additions & 84 deletions src/coreclr/vm/riscv64/stubs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1081,96 +1081,92 @@ void StubLinkerCPU::EmitMovConstant(IntReg reg, UINT64 imm)
}
}

void StubLinkerCPU::EmitCmpImm(IntReg reg, int imm)
void StubLinkerCPU::EmitJumpRegister(IntReg regTarget)
{
_ASSERTE(!"RISCV64: not implementation on riscv64!!!");
Emit32(0x00000067 | (regTarget << 15));
}

void StubLinkerCPU::EmitCmpReg(IntReg Xn, IntReg Xm)
// Instruction types as per RISC-V Spec, Chapter 24 RV32/64G Instruction Set Listings
static unsigned ITypeInstr(unsigned opcode, unsigned funct3, unsigned rd, unsigned rs1, int imm12)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

*TypeInstr functions are very useful, I think they should be available in JIT as well

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can be moved. Since I'm fairly new to the codebase, what's a good place to put code shared between VM and JIT? utilcode?

There's some more redundancy between the JIT and VM emitter code, e.g. isValid*imm*(), that would be worth factoring out to a common place.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

src/coreclr/utilcode/util.cpp looks more suitable.
@jkotas any thoughts?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The duplication of instruction encoding helpers is very small and it is there on all platforms. I do not think that it is worth fixing. The instruction encoding support in the JIT has to support nearly all instruction. The stublinker needs to be able to encode just a few instructions and so it can be very simple.

{
_ASSERTE(!"RISCV64: not implementation on riscv64!!!");
_ASSERTE(!(opcode >> 7));
_ASSERTE(!(funct3 >> 3));
_ASSERTE(!(rd >> 5));
_ASSERTE(!(rs1 >> 5));
_ASSERTE(StubLinkerCPU::isValidSimm12(imm12));
return opcode | (rd << 7) | (funct3 << 12) | (rs1 << 15) | (imm12 << 20);
}

void StubLinkerCPU::EmitCondFlagJump(CodeLabel * target, UINT cond)
static unsigned STypeInstr(unsigned opcode, unsigned funct3, unsigned rs1, unsigned rs2, int imm12)
{
_ASSERTE(!"RISCV64: not implementation on riscv64!!!");
_ASSERTE(!(opcode >> 7));
_ASSERTE(!(funct3 >> 3));
_ASSERTE(!(rs1 >> 5));
_ASSERTE(!(rs2 >> 5));
_ASSERTE(StubLinkerCPU::isValidSimm12(imm12));
int immLo5 = imm12 & 0x1f;
int immHi7 = (imm12 >> 5) & 0x7f;
return opcode | (immLo5 << 7) | (funct3 << 12) | (rs1 << 15) | (rs2 << 20) | (immHi7 << 25);
}

void StubLinkerCPU::EmitJumpRegister(IntReg regTarget)
static unsigned RTypeInstr(unsigned opcode, unsigned funct3, unsigned funct7, unsigned rd, unsigned rs1, unsigned rs2)
{
Emit32(0x00000067 | (regTarget << 15));
_ASSERTE(!(opcode >> 7));
_ASSERTE(!(funct3 >> 3));
_ASSERTE(!(funct7 >> 7));
_ASSERTE(!(rd >> 5));
_ASSERTE(!(rs1 >> 5));
_ASSERTE(!(rs2 >> 5));
return opcode | (rd << 7) | (funct3 << 12) | (rs1 << 15) | (rs2 << 20) | (funct7 << 25);
}

void StubLinkerCPU::EmitRet(IntReg Xn)
void StubLinkerCPU::EmitLoad(IntReg dest, IntReg srcAddr, int offset)
{
Emit32((DWORD)(0x00000067 | (Xn << 15))); // jalr X0, 0(Xn)
Emit32(ITypeInstr(0x3, 0x3, dest, srcAddr, offset)); // ld
}

void StubLinkerCPU::EmitLoadStoreRegImm(DWORD flags, IntReg Xt, IntReg Xn, int offset)
void StubLinkerCPU::EmitLoad(FloatReg dest, IntReg srcAddr, int offset)
{
BOOL isLoad = flags & 1;
if (isLoad) {
// ld regNum, offset(Xn);
Emit32((DWORD)(0x00003003 | (Xt << 7) | (Xn << 15) | (offset << 20)));
} else {
// sd regNum, offset(Xn)
Emit32((DWORD)(0x00003023 | (Xt << 20) | (Xn << 15) | (offset & 0x1F) << 7 | (((offset >> 5) & 0x7F) << 25)));
}
Emit32(ITypeInstr(0x7, 0x3, dest, srcAddr, offset)); // fld
}

void StubLinkerCPU::EmitLoadStoreRegImm(DWORD flags, FloatReg Ft, IntReg Xn, int offset)
void StubLinkerCPU:: EmitStore(IntReg src, IntReg destAddr, int offset)
{
BOOL isLoad = flags & 1;
if (isLoad) {
// fld Ft, Xn, offset
Emit32((DWORD)(0x00003007 | (Xn << 15) | (Ft << 7) | (offset << 20)));
} else {
// fsd Ft, offset(Xn)
Emit32((WORD)(0x00003027 | (Xn << 15) | (Ft << 20) | (offset & 0xF) << 7 | ((offset >> 4) & 0xFF)));
}
Emit32(STypeInstr(0x23, 0x3, destAddr, src, offset)); // sd
}

void StubLinkerCPU::EmitLoadFloatRegImm(FloatReg ft, IntReg base, int offset)
void StubLinkerCPU::EmitStore(FloatReg src, IntReg destAddr, int offset)
{
// fld ft,base,offset
_ASSERTE(offset <= 2047 && offset >= -2048);
Emit32(0x2b800000 | (base.reg << 15) | ((offset & 0xfff)<<20) | (ft.reg << 7));
Emit32(STypeInstr(0x27, 0x3, destAddr, src, offset)); // fsd
}

void StubLinkerCPU::EmitMovReg(IntReg Xd, IntReg Xm)
{
Emit32(0x00000013 | (Xm << 15) | (Xd << 7));
EmitAddImm(Xd, Xm, 0);
}
void StubLinkerCPU::EmitMovReg(FloatReg dest, FloatReg source)
{
Emit32(RTypeInstr(0x53, 0, 0x11, dest, source, source)); // fsgnj.d
}

void StubLinkerCPU::EmitSubImm(IntReg Xd, IntReg Xn, unsigned int value)
{
_ASSERTE(value <= 0x800);
Emit32((DWORD)(0x00000013 | (((~value + 0x1) & 0xFFF) << 20) | (Xn << 15) | (Xd << 7))); // addi Xd, Xn, (~value + 0x1) & 0xFFF
EmitAddImm(Xd, Xn, ~value + 0x1);
}

void StubLinkerCPU::EmitAddImm(IntReg Xd, IntReg Xn, unsigned int value)
{
_ASSERTE(value <= 0x7FF);
Emit32((DWORD)(0x00000013 | (value << 20) | (Xn << 15) | (Xd << 7))); // addi Xd, Xn, value
Emit32(ITypeInstr(0x13, 0, Xd, Xn, value)); // addi
}

void StubLinkerCPU::EmitSllImm(IntReg Xd, IntReg Xn, unsigned int value)
{
_ASSERTE(value <= 0x3F);
Emit32((DWORD)(0x00001013 | (value << 20) | (Xn << 15) | (Xd << 7))); // sll Xd, Xn, value
_ASSERTE(!(value >> 6));
Emit32(ITypeInstr(0x13, 0x1, Xd, Xn, value)); // slli
}

void StubLinkerCPU::EmitLuImm(IntReg Xd, unsigned int value)
{
_ASSERTE(value <= 0xFFFFF);
Emit32((DWORD)(0x00000037 | (value << 12) | (Xd << 7))); // lui Xd, value
}

void StubLinkerCPU::EmitCallRegister(IntReg reg)
{
_ASSERTE(!"RISCV64: not implementation on riscv64!!!");
}

void StubLinkerCPU::Init()
{
new (gBranchIF) BranchInstructionFormat();
Expand All @@ -1179,12 +1175,13 @@ void StubLinkerCPU::Init()
// Emits code to adjust arguments for static delegate target.
VOID StubLinkerCPU::EmitShuffleThunk(ShuffleEntry *pShuffleEntryArray)
{
static const int argRegBase = 10; // first argument register: a0, fa0
static const IntReg t6 = 31, t5 = 30, a0 = argRegBase + 0;
// On entry a0 holds the delegate instance. Look up the real target address stored in the MethodPtrAux
// field and saved in t6. Tailcall to the target method after re-arranging the arguments
// ld t6, a0, offsetof(DelegateObject, _methodPtrAux)
EmitLoadStoreRegImm(eLOAD, IntReg(31)/*t6*/, IntReg(10)/*a0*/, DelegateObject::GetOffsetOfMethodPtrAux());
// addi t5, a0, DelegateObject::GetOffsetOfMethodPtrAux() - load the indirection cell into t5 used by ResolveWorkerAsmStub
EmitAddImm(30/*t5*/, 10/*a0*/, DelegateObject::GetOffsetOfMethodPtrAux());
EmitLoad(t6, a0, DelegateObject::GetOffsetOfMethodPtrAux());
// load the indirection cell into t5 used by ResolveWorkerAsmStub
EmitAddImm(t5, a0, DelegateObject::GetOffsetOfMethodPtrAux());

int delay_index[8] = {-1};
bool is_store = false;
Expand All @@ -1204,23 +1201,19 @@ VOID StubLinkerCPU::EmitShuffleThunk(ShuffleEntry *pShuffleEntryArray)

if (pEntry->srcofs & ShuffleEntry::FPREGMASK)
{
_ASSERTE(!"RISCV64: not validated on riscv64!!!");
// FirstFloatReg is 10;
int j = 10;
int j = 1;
while (pEntry[j].srcofs & ShuffleEntry::FPREGMASK)
{
j++;
}
assert((pEntry->dstofs - pEntry->srcofs) == index);
assert(8 > index);

int tmp_reg = 0; // f0.
int tmp_reg = 0; // ft0.
ShuffleEntry* tmp_entry = pShuffleEntryArray + delay_index[0];
while (index)
{
// fld(Ft, sp, offset);
_ASSERTE(isValidSimm12(tmp_entry->srcofs << 3));
Emit32(0x3007 | (tmp_reg << 15) | (2 << 7/*sp*/) | ((tmp_entry->srcofs << 3) << 20));
EmitLoad(FloatReg(tmp_reg), RegSp, tmp_entry->srcofs * sizeof(void*));
tmp_reg++;
index--;
tmp_entry++;
Expand All @@ -1231,72 +1224,72 @@ VOID StubLinkerCPU::EmitShuffleThunk(ShuffleEntry *pShuffleEntryArray)
i += j;
while (pEntry[j].srcofs & ShuffleEntry::FPREGMASK)
{
if (pEntry[j].dstofs & ShuffleEntry::FPREGMASK)// fsgnj.d fd, fs, fs
Emit32(0x22000053 | ((pEntry[j].dstofs & ShuffleEntry::OFSREGMASK) << 7) | ((pEntry[j].srcofs & ShuffleEntry::OFSREGMASK) << 15) | ((pEntry[j].srcofs & ShuffleEntry::OFSREGMASK) << 20));
else //// fsd(Ft, Rn, offset);
FloatReg src = (pEntry[j].srcofs & ShuffleEntry::OFSREGMASK) + argRegBase;
if (pEntry[j].dstofs & ShuffleEntry::FPREGMASK) {
FloatReg dst = (pEntry[j].dstofs & ShuffleEntry::OFSREGMASK) + argRegBase;
EmitMovReg(dst, src);
}
else
{
_ASSERTE(isValidSimm12((pEntry[j].dstofs * sizeof(long))));
Emit32(0x3027 | ((pEntry[j].srcofs & ShuffleEntry::OFSREGMASK) << 20) | (2 << 15 /*sp*/) | ((pEntry[j].dstofs * sizeof(long) & 0x1f) << 7) | ((pEntry[j].dstofs * sizeof(long) & 0x7f) << 25));
EmitStore(src, RegSp, pEntry[j].dstofs * sizeof(void*));
}
j--;
}
assert(tmp_reg <= 11);
/*
while (tmp_reg > 11)

assert(tmp_reg <= 7);
while (tmp_reg > 0)
{
tmp_reg--;
// fmov.d fd, fs
Emit32(0x01149800 | index | (tmp_reg << 5));
EmitMovReg(FloatReg(index + argRegBase), FloatReg(tmp_reg));
index++;
}
*/
index = 0;
pEntry = tmp_entry;
}
else
{
// 10 is the offset of FirstGenArgReg to FirstGenReg
assert(pEntry->dstofs & ShuffleEntry::REGMASK);
assert((pEntry->dstofs & ShuffleEntry::OFSMASK) < (pEntry->srcofs & ShuffleEntry::OFSMASK));
EmitMovReg(IntReg((pEntry->dstofs & ShuffleEntry::OFSMASK) + 10), IntReg((pEntry->srcofs & ShuffleEntry::OFSMASK) + 10));
IntReg dst = (pEntry->dstofs & ShuffleEntry::OFSREGMASK) + argRegBase;
IntReg src = (pEntry->srcofs & ShuffleEntry::OFSREGMASK) + argRegBase;
assert(dst < src);
EmitMovReg(dst, src);
}
}
else if (pEntry->dstofs & ShuffleEntry::REGMASK)
{
// source must be on the stack
_ASSERTE(!(pEntry->srcofs & ShuffleEntry::REGMASK));

int dstReg = (pEntry->dstofs & ShuffleEntry::OFSREGMASK) + argRegBase;
int srcOfs = (pEntry->srcofs & ShuffleEntry::OFSMASK) * sizeof(void*);
if (pEntry->dstofs & ShuffleEntry::FPREGMASK)
{
if (!is_store)
{
delay_index[index++] = i;
continue;
}
EmitLoadFloatRegImm(FloatReg((pEntry->dstofs & ShuffleEntry::OFSREGMASK) + 10), RegSp, pEntry->srcofs * sizeof(void*));
EmitLoad(FloatReg(dstReg), RegSp, srcOfs);
}
else
{
assert(pEntry->dstofs & ShuffleEntry::REGMASK);
EmitLoadStoreRegImm(eLOAD, IntReg((pEntry->dstofs & ShuffleEntry::OFSMASK) + 10), RegSp, pEntry->srcofs * sizeof(void*));
EmitLoad(IntReg(dstReg), RegSp, srcOfs);
}
}
else
{
// source must be on the stack
// source & dest must be on the stack
_ASSERTE(!(pEntry->srcofs & ShuffleEntry::REGMASK));

// dest must be on the stack
_ASSERTE(!(pEntry->dstofs & ShuffleEntry::REGMASK));

EmitLoadStoreRegImm(eLOAD, IntReg(29)/*t4*/, RegSp, pEntry->srcofs * sizeof(void*));
EmitLoadStoreRegImm(eSTORE, IntReg(29)/*t4*/, RegSp, pEntry->dstofs * sizeof(void*));
IntReg t4 = 29;
EmitLoad(t4, RegSp, pEntry->srcofs * sizeof(void*));
EmitStore(t4, RegSp, pEntry->dstofs * sizeof(void*));
}
}

// Tailcall to target
// jalr x0, 0(t6)
EmitJumpRegister(31);
EmitJumpRegister(t6);
}

// Emits code to adjust arguments for static delegate target.
Expand Down Expand Up @@ -1334,7 +1327,7 @@ VOID StubLinkerCPU::EmitComputedInstantiatingMethodStub(MethodDesc* pSharedMD, s
// Unboxing stub case
// Fill param arg with methodtable of this pointer
// ld regHidden, a0, 0
EmitLoadStoreRegImm(eLOAD, IntReg(regHidden), IntReg(10), 0);
EmitLoad(IntReg(regHidden), IntReg(10));
}
}
else
Expand Down
1 change: 0 additions & 1 deletion src/tests/Common/scripts/bringup_runtest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1276,7 +1276,6 @@ else
fi

scriptPath=$(dirname $0)
${scriptPath}/setup-stress-dependencies.sh --arch=$ARCH --outputDir=$coreOverlayDir
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you remove this part in the script?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because setup-stress-dependencies.sh has been removed, details in commit message.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. The script is removed.


export __TestEnv=$testEnv

Expand Down