Skip to content
This repository has been archived by the owner on Jan 23, 2023. It is now read-only.

Commit

Permalink
Implement stack probing using helpers on (win|linux)-(x86|x64) (#26807)
Browse files Browse the repository at this point in the history
* Implement JIT_StackProbe (CORINFO_HELP_STACK_PROBE) helper in assembly on win-x64, win-x86, linux-x64, linux-x86 in src/vm/amd64 and src/vm/i386

* Insert a call to CORINFO_HELP_STACK_PROBE helper instead of inlining a stack probing loop in src/jit/codegenxarch.cpp

* Add READYTORUN_HELPER_StackProbe and its mapping to CORINFO_HELP_STACK_PROBE in src/inc/readytorun.h and src/inc/readytorunhelpers.h

* Update crossgen2 JitInterface and JITEEVersionIdentifier in src/inc/corinfo.h and src/tools/crossgen2/jitinterface/jitwrapper.cpp 

* Add test templates (T4) for stack probing tests in tests/src/JIT/Regression/JitBlue/GitHub_21061/GitHub_21061_StackOverflowIn(Function|Funclet)Prolog.tt
  • Loading branch information
echesakov authored Oct 7, 2019
1 parent 58084a2 commit 2ca2b2b
Show file tree
Hide file tree
Showing 15 changed files with 466 additions and 134 deletions.
12 changes: 7 additions & 5 deletions src/inc/corinfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,11 +217,11 @@ TODO: Talk about initializing strutures before use
#endif
#endif

SELECTANY const GUID JITEEVersionIdentifier = { /* e2ae5b32-a9ab-426e-bc2a-ae1a883e0367 */
0xe2ae5b32,
0xa9ab,
0x426e,
{0xbc, 0x2a, 0xae, 0x1a, 0x88, 0x3e, 0x03, 0x67}
SELECTANY const GUID JITEEVersionIdentifier = { /* 1ce51eeb-dfd0-4450-ba2c-ea0d2d863df5 */
0x1ce51eeb,
0xdfd0,
0x4450,
{0xba, 0x2c, 0xea, 0x0d, 0x2d, 0x86, 0x3d, 0xf5}
};

//////////////////////////////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -660,6 +660,8 @@ enum CorInfoHelpFunc

CORINFO_HELP_GVMLOOKUP_FOR_SLOT, // Resolve a generic virtual method target from this pointer and runtime method handle

CORINFO_HELP_STACK_PROBE, // Probes each page of the allocated stack frame

CORINFO_HELP_COUNT,
};

Expand Down
6 changes: 6 additions & 0 deletions src/inc/jithelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,12 @@

JITHELPER(CORINFO_HELP_GVMLOOKUP_FOR_SLOT, NULL, CORINFO_HELP_SIG_NO_ALIGN_STUB)

#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_)
JITHELPER(CORINFO_HELP_STACK_PROBE, JIT_StackProbe, CORINFO_HELP_SIG_REG_ONLY)
#else // !_TARGET_X86_ && !_TARGET_AMD64_
JITHELPER(CORINFO_HELP_STACK_PROBE, NULL, CORINFO_HELP_SIG_UNDEF)
#endif // !_TARGET_X86_ && !_TARGET_AMD64_

#undef JITHELPER
#undef DYNAMICJITHELPER
#undef JITHELPER
Expand Down
3 changes: 3 additions & 0 deletions src/inc/readytorun.h
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,9 @@ enum ReadyToRunHelper

// JIT32 x86-specific exception handling
READYTORUN_HELPER_EndCatch = 0x110,

// Stack probing helper
READYTORUN_HELPER_StackProbe = 0x111,
};

//
Expand Down
4 changes: 4 additions & 0 deletions src/inc/readytorunhelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,5 +117,9 @@ HELPER(READYTORUN_HELPER_PInvokeEnd, CORINFO_HELP_JIT_PINVOKE_END
HELPER(READYTORUN_HELPER_MonitorEnter, CORINFO_HELP_MON_ENTER, )
HELPER(READYTORUN_HELPER_MonitorExit, CORINFO_HELP_MON_EXIT, )

#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_)
HELPER(READYTORUN_HELPER_StackProbe, CORINFO_HELP_STACK_PROBE, )
#endif

#undef HELPER
#undef OPTIMIZEFORSPEED
159 changes: 36 additions & 123 deletions src/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2263,143 +2263,56 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni
// Frame size >= 0x3000
assert(frameSize >= compiler->getVeryLargeFrameSize());

// Emit the following sequence to 'tickle' the pages.
// Note it is important that stack pointer not change until this is
// complete since the tickles could cause a stack overflow, and we
// need to be able to crawl the stack afterward (which means the
// stack pointer needs to be known).
#ifdef _TARGET_X86_
int spOffset = -(int)frameSize;

bool pushedStubParam = false;
if (compiler->info.compPublishStubParam && (REG_SECRET_STUB_PARAM == initReg))
if (compiler->info.compPublishStubParam)
{
// push register containing the StubParam
inst_RV(INS_push, REG_SECRET_STUB_PARAM, TYP_I_IMPL);
pushedStubParam = true;
GetEmitter()->emitIns_R(INS_push, EA_PTRSIZE, REG_SECRET_STUB_PARAM);
spOffset += REGSIZE_BYTES;
}

#ifndef _TARGET_UNIX_
instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
#endif
GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_STACK_PROBE_HELPER_ARG, REG_SPBASE, spOffset);
regSet.verifyRegUsed(REG_STACK_PROBE_HELPER_ARG);

//
// Can't have a label inside the ReJIT padding area
//
// Can't have a call until we have enough padding for ReJit.
genPrologPadForReJit();
genEmitHelperCall(CORINFO_HELP_STACK_PROBE, 0, EA_UNKNOWN);

#ifndef _TARGET_UNIX_
// Code size for each instruction. We need this because the
// backward branch is hard-coded with the number of bytes to branch.
// The encoding differs based on the architecture and what register is
// used (namely, using RAX has a smaller encoding).
//
// xor eax,eax
// loop:
// For x86
// test [esp + eax], eax 3
// sub eax, 0x1000 5
// cmp EAX, -frameSize 5
// jge loop 2
//
// For AMD64 using RAX
// test [rsp + rax], rax 4
// sub rax, 0x1000 6
// cmp rax, -frameSize 6
// jge loop 2
//
// For AMD64 using RBP
// test [rsp + rbp], rbp 4
// sub rbp, 0x1000 7
// cmp rbp, -frameSize 7
// jge loop 2

GetEmitter()->emitIns_R_ARR(INS_test, EA_PTRSIZE, initReg, REG_SPBASE, initReg, 0);
inst_RV_IV(INS_sub, initReg, pageSize, EA_PTRSIZE);
inst_RV_IV(INS_cmp, initReg, -((ssize_t)frameSize), EA_PTRSIZE);

int bytesForBackwardJump;
#ifdef _TARGET_AMD64_
assert((initReg == REG_EAX) || (initReg == REG_EBP)); // We use RBP as initReg for EH funclets.
bytesForBackwardJump = ((initReg == REG_EAX) ? -18 : -20);
#else // !_TARGET_AMD64_
assert(initReg == REG_EAX);
bytesForBackwardJump = -15;
#endif // !_TARGET_AMD64_

// Branch backwards to start of loop
inst_IV(INS_jge, bytesForBackwardJump);

lastTouchDelta = frameSize % pageSize;

#else // _TARGET_UNIX_

// Code size for each instruction. We need this because the
// backward branch is hard-coded with the number of bytes to branch.
// The encoding differs based on the architecture and what register is
// used (namely, using RAX has a smaller encoding).
//
// For x86
// lea eax, [esp - frameSize]
// loop:
// lea esp, [esp - pageSize] 7
// test [esp], eax 3
// cmp esp, eax 2
// jge loop 2
// lea rsp, [rbp + frameSize]
//
// For AMD64 using RAX
// lea rax, [rsp - frameSize]
// loop:
// lea rsp, [rsp - pageSize] 8
// test [rsp], rax 4
// cmp rsp, rax 3
// jge loop 2
// lea rsp, [rax + frameSize]
//
// For AMD64 using RBP
// lea rbp, [rsp - frameSize]
// loop:
// lea rsp, [rsp - pageSize] 8
// test [rsp], rbp 4
// cmp rsp, rbp 3
// jge loop 2
// lea rsp, [rbp + frameSize]

int sPageSize = (int)pageSize;

GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, initReg, REG_SPBASE, -((ssize_t)frameSize)); // get frame border

GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, -sPageSize);
GetEmitter()->emitIns_R_AR(INS_test, EA_PTRSIZE, initReg, REG_SPBASE, 0);
inst_RV_RV(INS_cmp, REG_SPBASE, initReg);

int bytesForBackwardJump;
#ifdef _TARGET_AMD64_
assert((initReg == REG_EAX) || (initReg == REG_EBP)); // We use RBP as initReg for EH funclets.
bytesForBackwardJump = -17;
#else // !_TARGET_AMD64_
assert(initReg == REG_EAX);
bytesForBackwardJump = -14;
#endif // !_TARGET_AMD64_
if (compiler->info.compPublishStubParam)
{
GetEmitter()->emitIns_R(INS_pop, EA_PTRSIZE, REG_SECRET_STUB_PARAM);
GetEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, frameSize);
}
else
{
GetEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, REG_STACK_PROBE_HELPER_ARG);
}
#else // !_TARGET_X86_
static_assert_no_msg((RBM_STACK_PROBE_HELPER_ARG & (RBM_SECRET_STUB_PARAM | RBM_DEFAULT_HELPER_CALL_TARGET)) ==
RBM_NONE);

inst_IV(INS_jge, bytesForBackwardJump); // Branch backwards to start of loop
GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_STACK_PROBE_HELPER_ARG, REG_SPBASE, -(int)frameSize);
regSet.verifyRegUsed(REG_STACK_PROBE_HELPER_ARG);

GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, initReg, frameSize); // restore stack pointer
// Can't have a call until we have enough padding for ReJit.
genPrologPadForReJit();
genEmitHelperCall(CORINFO_HELP_STACK_PROBE, 0, EA_UNKNOWN);

lastTouchDelta = 0; // The loop code above actually over-probes: it always probes beyond the final SP we need.
if (initReg == REG_DEFAULT_HELPER_CALL_TARGET)
{
*pInitRegZeroed = false;
}

#endif // _TARGET_UNIX_
static_assert_no_msg((RBM_STACK_PROBE_HELPER_TRASH & RBM_STACK_PROBE_HELPER_ARG) == RBM_NONE);

*pInitRegZeroed = false; // The initReg does not contain zero
GetEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, REG_STACK_PROBE_HELPER_ARG);
#endif // !_TARGET_X86_

if (pushedStubParam)
if (initReg == REG_STACK_PROBE_HELPER_ARG)
{
// pop eax
inst_RV(INS_pop, REG_SECRET_STUB_PARAM, TYP_I_IMPL);
regSet.verifyRegUsed(REG_SECRET_STUB_PARAM);
*pInitRegZeroed = false;
}

// sub esp, frameSize 6
inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE);
}

if (lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > pageSize)
Expand Down Expand Up @@ -3333,7 +3246,7 @@ unsigned CodeGen::genMove8IfNeeded(unsigned size, regNumber longTmpReg, GenTree*
#ifdef _TARGET_X86_
instruction longMovIns = INS_movq;
#else // !_TARGET_X86_
instruction longMovIns = INS_mov;
instruction longMovIns = INS_mov;
#endif // !_TARGET_X86_
if ((size & 8) != 0)
{
Expand Down
14 changes: 14 additions & 0 deletions src/jit/target.h
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,11 @@ typedef unsigned char regNumberSmall;
// on the stack guard page, and must be touched before any further "SUB SP".
#define STACK_PROBE_BOUNDARY_THRESHOLD_BYTES ARG_STACK_PROBE_THRESHOLD_BYTES

#define REG_STACK_PROBE_HELPER_ARG REG_EAX
#define RBM_STACK_PROBE_HELPER_ARG RBM_EAX

#define RBM_STACK_PROBE_HELPER_TRASH RBM_NONE

#elif defined(_TARGET_AMD64_)
// TODO-AMD64-CQ: Fine tune the following xxBlk threshold values:

Expand Down Expand Up @@ -896,6 +901,15 @@ typedef unsigned char regNumberSmall;
// AMD64 uses FEATURE_FIXED_OUT_ARGS so this can be zero.
#define STACK_PROBE_BOUNDARY_THRESHOLD_BYTES 0

#define REG_STACK_PROBE_HELPER_ARG REG_R11
#define RBM_STACK_PROBE_HELPER_ARG RBM_R11

#ifdef _TARGET_UNIX_
#define RBM_STACK_PROBE_HELPER_TRASH RBM_NONE
#else // !_TARGET_UNIX_
#define RBM_STACK_PROBE_HELPER_TRASH RBM_RAX
#endif // !_TARGET_UNIX_

#elif defined(_TARGET_ARM_)

// TODO-ARM-CQ: Use shift for division by power of 2
Expand Down
2 changes: 2 additions & 0 deletions src/tools/crossgen2/Common/JitInterface/CorInfoHelpFunc.cs
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,8 @@ which is the right helper to use to allocate an object of a given type. */

CORINFO_HELP_GVMLOOKUP_FOR_SLOT, // Resolve a generic virtual method target from this pointer and runtime method handle

CORINFO_HELP_STACK_PROBE, // Probes each page of the allocated stack frame

CORINFO_HELP_COUNT,
}
}
10 changes: 5 additions & 5 deletions src/tools/crossgen2/jitinterface/jitwrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@ class CORJIT_FLAGS
unsigned __int64 corJitFlags;
};

static const GUID JITEEVersionIdentifier = { /* d609bed1-7831-49fc-bd49-b6f054dd4d46 */
0xe2ae5b32,
0xa9ab,
0x426e,
{0xbc, 0x2a, 0xae, 0x1a, 0x88, 0x3e, 0x03, 0x67}
static const GUID JITEEVersionIdentifier = { /* 1ce51eeb-dfd0-4450-ba2c-ea0d2d863df5 */
0x1ce51eeb,
0xdfd0,
0x4450,
{0xba, 0x2c, 0xea, 0x0d, 0x2d, 0x86, 0x3d, 0xf5}
};

class Jit
Expand Down
34 changes: 33 additions & 1 deletion src/vm/amd64/JitHelpers_Fast.asm
Original file line number Diff line number Diff line change
Expand Up @@ -955,5 +955,37 @@ endif ; _DEBUG

NESTED_END TailCallHelperStub, _TEXT

end
; The following helper will access ("probe") a word on each page of the stack
; starting with the page right beneath rsp down to the one pointed to by r11.
; The procedure is needed to make sure that the "guard" page is pushed down below the allocated stack frame.
; The call to the helper will be emitted by JIT in the function/funclet prolog when large (larger than 0x3000 bytes) stack frame is required.
;
; NOTE: this helper will NOT modify a value of rsp and can be defined as a leaf function.

PAGE_SIZE equ 1000h

LEAF_ENTRY JIT_StackProbe, _TEXT
; On entry:
; r11 - points to the lowest address on the stack frame being allocated (i.e. [InitialSp - FrameSize])
; rsp - points to some byte on the last probed page
; On exit:
; rax - is not preserved
; r11 - is preserved
;
; NOTE: this helper will probe at least one page below the one pointed by rsp.

lea rax, [rsp - PAGE_SIZE] ; rax points to some byte on the first unprobed page
or rax, (PAGE_SIZE - 1) ; rax points to the **highest address** on the first unprobed page
; This is done to make the following loop end condition simpler.

ProbeLoop:
test dword ptr [rax], eax
sub rax, PAGE_SIZE ; rax points to the highest address of the **next page** to probe
cmp rax, r11
jge ProbeLoop ; if (rax >= r11), then we need to probe the page pointed to by rax.

ret

LEAF_END JIT_StackProbe, _TEXT

end
42 changes: 42 additions & 0 deletions src/vm/amd64/jithelpers_fast.S
Original file line number Diff line number Diff line change
Expand Up @@ -537,3 +537,45 @@ LEAF_ENTRY JIT_Stelem_Ref__ArrayStoreCheck_Helper, _TEXT
jmp C_FUNC(JIT_WriteBarrier)
#endif
LEAF_END JIT_Stelem_Ref__ArrayStoreCheck_Helper, _TEXT

// The following helper will access ("probe") a word on each page of the stack
// starting with the page right beneath rsp down to the one pointed to by r11.
// The procedure is needed to make sure that the "guard" page is pushed down below the allocated stack frame.
// The call to the helper will be emitted by JIT in the function/funclet prolog when large (larger than 0x3000 bytes) stack frame is required.
//
// NOTE: On Linux we must advance the stack pointer as we probe - it is not allowed to access 65535 bytes below rsp.
// Since this helper will modify a value of rsp - it must establish the frame pointer.
//
// See also https://github.com/dotnet/coreclr/issues/16827#issue-303331518 for more information.

#define PAGE_SIZE 0x1000

NESTED_ENTRY JIT_StackProbe, _TEXT, NoHandler
// On entry:
// r11 - points to the lowest address on the stack frame being allocated (i.e. [InitialSp - FrameSize])
// rsp - points to some byte on the last probed page
// On exit:
// r11 - is preserved
//
// NOTE: this helper will probe at least one page below the one pointed by rsp.

push_nonvol_reg rbp
mov rbp, rsp
set_cfa_register rbp, 16

END_PROLOGUE

sub rsp, PAGE_SIZE // rsp points to some byte on the first unprobed page
or rsp, (PAGE_SIZE - 1) // rsp points to the **highest address** on the first unprobed page
// This is done to make the following loop end condition simpler.

LOCAL_LABEL(ProbeLoop):
test dword ptr [rsp], eax
sub rsp, PAGE_SIZE // rsp points to the highest address of the **next page** to probe
cmp rsp, r11
jge LOCAL_LABEL(ProbeLoop) // if (rsp >= r11), then we need to probe the page pointed to by rsp.

RESET_FRAME_WITH_RBP
ret

NESTED_END JIT_StackProbe, _TEXT
Loading

0 comments on commit 2ca2b2b

Please sign in to comment.