Skip to content

Commit

Permalink
Implement fake hot/cold splitting and corresponding stress mode (#69763)
Browse files Browse the repository at this point in the history
* Implemented fake code splitting in JIT for testing without VM

* Implement stress mode for hot/cold splitting

Implementation splits after first basic block in method, assuming
there is more than one block. Accompanying this implementation are
the following fixes:
- Loop alignment is disabled for cold blocks, as moving blocks
into the cold section may invalidate the initial decision to align.
- Long jumps are no longer reduced to short jumps if crossing
hot/cold sections.

Co-authored-by: Aman Khalid <t-amankhalid@microsoft.com>
  • Loading branch information
amanasifkhalid and Aman Khalid authored May 27, 2022
1 parent d7b9fce commit 70fd5dc
Show file tree
Hide file tree
Showing 7 changed files with 124 additions and 41 deletions.
12 changes: 10 additions & 2 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3186,14 +3186,18 @@ void Compiler::compInitOptions(JitFlags* jitFlags)

opts.compReloc = jitFlags->IsSet(JitFlags::JIT_FLAG_RELOC);

bool enableFakeSplitting = false;

#ifdef DEBUG
enableFakeSplitting = JitConfig.JitFakeProcedureSplitting();

#if defined(TARGET_XARCH)
// Whether encoding of absolute addr as PC-rel offset is enabled
opts.compEnablePCRelAddr = (JitConfig.EnablePCRelAddr() != 0);
#endif
#endif // DEBUG

opts.compProcedureSplitting = jitFlags->IsSet(JitFlags::JIT_FLAG_PROCSPLIT);
opts.compProcedureSplitting = jitFlags->IsSet(JitFlags::JIT_FLAG_PROCSPLIT) || enableFakeSplitting;

#ifdef TARGET_ARM64
// TODO-ARM64-NYI: enable hot/cold splitting
Expand All @@ -3207,7 +3211,7 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
if (opts.compProcedureSplitting)
{
// Note that opts.compdbgCode is true under ngen for checked assemblies!
opts.compProcedureSplitting = !opts.compDbgCode;
opts.compProcedureSplitting = !opts.compDbgCode || enableFakeSplitting;

#ifdef DEBUG
// JitForceProcedureSplitting is used to force procedure splitting on checked assemblies.
Expand Down Expand Up @@ -3236,6 +3240,7 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
}

#ifdef DEBUG

// Now, set compMaxUncheckedOffsetForNullObject for STRESS_NULL_OBJECT_CHECK
if (compStressCompile(STRESS_NULL_OBJECT_CHECK, 30))
{
Expand Down Expand Up @@ -5185,6 +5190,9 @@ void Compiler::placeLoopAlignInstructions()

if ((block->bbNext != nullptr) && (block->bbNext->isLoopAlign()))
{
// Loop alignment is disabled for cold blocks
assert((block->bbFlags & BBF_COLD) == 0);

// If jmp was not found, then block before the loop start is where align instruction will be added.
if (bbHavingAlign == nullptr)
{
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -7607,6 +7607,8 @@ class Compiler

// ICorJitInfo wrappers

void eeAllocMem(AllocMemArgs* args);

void eeReserveUnwindInfo(bool isFunclet, bool isColdCode, ULONG unwindSize);

void eeAllocUnwindInfo(BYTE* pHotCode,
Expand Down
45 changes: 45 additions & 0 deletions src/coreclr/jit/ee_il_dll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1122,6 +1122,37 @@ void Compiler::eeDispLineInfos()
* (e.g., host AMD64, target ARM64), then VM will get confused anyway.
*/

void Compiler::eeAllocMem(AllocMemArgs* args)
{
#ifdef DEBUG
// Fake splitting implementation: hot section = hot code + 4K buffer + cold code
const UNATIVE_OFFSET hotSizeRequest = args->hotCodeSize;
const UNATIVE_OFFSET coldSizeRequest = args->coldCodeSize;
const UNATIVE_OFFSET fakeSplittingBuffer = 4096;

if (JitConfig.JitFakeProcedureSplitting() && (coldSizeRequest > 0))
{
args->hotCodeSize = hotSizeRequest + fakeSplittingBuffer + coldSizeRequest;
args->coldCodeSize = 0;
}
#endif

info.compCompHnd->allocMem(args);

#ifdef DEBUG
if (JitConfig.JitFakeProcedureSplitting() && (coldSizeRequest > 0))
{
// Fix up hot/cold code pointers
args->coldCodeBlock = ((BYTE*)args->hotCodeBlock) + hotSizeRequest + fakeSplittingBuffer;
args->coldCodeBlockRW = ((BYTE*)args->hotCodeBlockRW) + hotSizeRequest + fakeSplittingBuffer;

// Reset args' hot/cold code sizes in case caller reads them later
args->hotCodeSize = hotSizeRequest;
args->coldCodeSize = coldSizeRequest;
}
#endif
}

void Compiler::eeReserveUnwindInfo(bool isFunclet, bool isColdCode, ULONG unwindSize)
{
#ifdef DEBUG
Expand All @@ -1130,6 +1161,13 @@ void Compiler::eeReserveUnwindInfo(bool isFunclet, bool isColdCode, ULONG unwind
printf("reserveUnwindInfo(isFunclet=%s, isColdCode=%s, unwindSize=0x%x)\n", isFunclet ? "true" : "false",
isColdCode ? "true" : "false", unwindSize);
}

// Fake splitting currently does not handle unwind info for cold code
if (isColdCode && JitConfig.JitFakeProcedureSplitting())
{
JITDUMP("reserveUnwindInfo for cold code with JitFakeProcedureSplitting enabled: ignoring cold unwind info\n");
return;
}
#endif // DEBUG

if (info.compMatchedVM)
Expand Down Expand Up @@ -1169,6 +1207,13 @@ void Compiler::eeAllocUnwindInfo(BYTE* pHotCode,
}
printf(")\n");
}

// Fake splitting currently does not handle unwind info for cold code
if (pColdCode && JitConfig.JitFakeProcedureSplitting())
{
JITDUMP("allocUnwindInfo for cold code with JitFakeProcedureSplitting enabled: ignoring cold unwind info\n");
return;
}
#endif // DEBUG

if (info.compMatchedVM)
Expand Down
5 changes: 3 additions & 2 deletions src/coreclr/jit/emit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6065,7 +6065,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
args.xcptnsCount = xcptnsCount;
args.flag = allocMemFlag;

emitCmpHandle->allocMem(&args);
emitComp->eeAllocMem(&args);

codeBlock = (BYTE*)args.hotCodeBlock;
codeBlockRW = (BYTE*)args.hotCodeBlockRW;
Expand All @@ -6083,7 +6083,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
args.xcptnsCount = xcptnsCount;
args.flag = allocMemFlag;

emitCmpHandle->allocMem(&args);
emitComp->eeAllocMem(&args);

codeBlock = (BYTE*)args.hotCodeBlock;
codeBlockRW = (BYTE*)args.hotCodeBlockRW;
Expand Down Expand Up @@ -6337,6 +6337,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
assert(coldCodeBlock);
cp = coldCodeBlock;
writeableOffset = coldCodeBlockRW - coldCodeBlock;
emitOffsAdj = 0;
#ifdef DEBUG
if (emitComp->opts.disAsm || emitComp->verbose)
{
Expand Down
5 changes: 3 additions & 2 deletions src/coreclr/jit/fgopt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5963,9 +5963,10 @@ bool Compiler::fgUpdateFlowGraph(bool doTailDuplication)
(bNext != nullptr) && // block is not the last block
(bNext->bbRefs == 1) && // No other block jumps to bNext
(bNext->bbJumpKind == BBJ_ALWAYS) && // The next block is a BBJ_ALWAYS block
bNext->isEmpty() && // and it is an an empty block
bNext->isEmpty() && // and it is an empty block
(bNext != bNext->bbJumpDest) && // special case for self jumps
(bDest != fgFirstColdBlock))
(bDest != fgFirstColdBlock) &&
(!fgInDifferentRegions(block, bDest))) // do not cross hot/cold sections
{
// case (a)
//
Expand Down
87 changes: 52 additions & 35 deletions src/coreclr/jit/flowgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3410,49 +3410,64 @@ PhaseStatus Compiler::fgDetermineFirstColdBlock()
BasicBlock* block;
BasicBlock* lblk;

for (lblk = nullptr, block = fgFirstBB; block != nullptr; lblk = block, block = block->bbNext)
{
bool blockMustBeInHotSection = false;
bool forceSplit = false;

#if HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION
if (bbIsHandlerBeg(block))
{
blockMustBeInHotSection = true;
}
#endif // HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION
#ifdef DEBUG
// If stress-splitting, split right after the first block; don't handle functions with EH
forceSplit = JitConfig.JitStressProcedureSplitting() && (compHndBBtabCount == 0);
#endif

// Do we have a candidate for the first cold block?
if (firstColdBlock != nullptr)
if (forceSplit)
{
firstColdBlock = fgFirstBB->bbNext;
prevToFirstColdBlock = fgFirstBB;
}
else
{
for (lblk = nullptr, block = fgFirstBB; block != nullptr; lblk = block, block = block->bbNext)
{
// We have a candidate for first cold block
bool blockMustBeInHotSection = false;

// Is this a hot block?
if (blockMustBeInHotSection || (block->isRunRarely() == false))
#if HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION
if (bbIsHandlerBeg(block))
{
// We have to restart the search for the first cold block
firstColdBlock = nullptr;
prevToFirstColdBlock = nullptr;
blockMustBeInHotSection = true;
}
}
else // (firstColdBlock == NULL)
{
// We don't have a candidate for first cold block
#endif // HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION

// Is this a cold block?
if (!blockMustBeInHotSection && (block->isRunRarely() == true))
// Do we have a candidate for the first cold block?
if (firstColdBlock != nullptr)
{
//
// If the last block that was hot was a BBJ_COND
// then we will have to add an unconditional jump
// so the code size for block needs be large
// enough to make it worth our while
//
if ((lblk == nullptr) || (lblk->bbJumpKind != BBJ_COND) || (fgGetCodeEstimate(block) >= 8))
// We have a candidate for first cold block

// Is this a hot block?
if (blockMustBeInHotSection || (block->isRunRarely() == false))
{
// This block is now a candidate for first cold block
// Also remember the predecessor to this block
firstColdBlock = block;
prevToFirstColdBlock = lblk;
// We have to restart the search for the first cold block
firstColdBlock = nullptr;
prevToFirstColdBlock = nullptr;
}
}
else // (firstColdBlock == NULL)
{
// We don't have a candidate for first cold block

// Is this a cold block?
if (!blockMustBeInHotSection && (block->isRunRarely() == true))
{
//
// If the last block that was hot was a BBJ_COND
// then we will have to add an unconditional jump
// so the code size for block needs be large
// enough to make it worth our while
//
if ((lblk == nullptr) || (lblk->bbJumpKind != BBJ_COND) || (fgGetCodeEstimate(block) >= 8))
{
// This block is now a candidate for first cold block
// Also remember the predecessor to this block
firstColdBlock = block;
prevToFirstColdBlock = lblk;
}
}
}
}
Expand All @@ -3479,8 +3494,9 @@ PhaseStatus Compiler::fgDetermineFirstColdBlock()
// then it may not be worth it to move it
// into the Cold section as a jump to the
// Cold section is 5 bytes in size.
// Ignore if stress-splitting.
//
if (firstColdBlock->bbNext == nullptr)
if (!forceSplit && firstColdBlock->bbNext == nullptr)
{
// If the size of the cold block is 7 or less
// then we will keep it in the Hot section.
Expand Down Expand Up @@ -3553,6 +3569,7 @@ PhaseStatus Compiler::fgDetermineFirstColdBlock()
for (block = firstColdBlock; block != nullptr; block = block->bbNext)
{
block->bbFlags |= BBF_COLD;
block->unmarkLoopAlign(this DEBUG_ARG("Loop alignment disabled for cold blocks"));
}

EXIT:;
Expand Down
9 changes: 9 additions & 0 deletions src/coreclr/jit/jitconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,9 @@ CONFIG_INTEGER(JitStressBiasedCSE, W("JitStressBiasedCSE"), 0x101) // Intern
// stress.
CONFIG_INTEGER(JitStressModeNamesOnly, W("JitStressModeNamesOnly"), 0) // Internal Jit stress: if nonzero, only enable
// stress modes listed in JitStressModeNames
CONFIG_INTEGER(JitStressProcedureSplitting, W("JitStressProcedureSplitting"), 0) // Always split after the first basic
// block. Skips functions with EH
// for simplicity.
CONFIG_INTEGER(JitStressRegs, W("JitStressRegs"), 0)
CONFIG_STRING(JitStressRegsRange, W("JitStressRegsRange")) // Only apply JitStressRegs to methods in this hash range

Expand Down Expand Up @@ -192,6 +195,12 @@ CONFIG_INTEGER(JitDumpAtOSROffset, W("JitDumpAtOSROffset"), -1) // Only dump OSR
CONFIG_INTEGER(JitDumpInlinePhases, W("JitDumpInlinePhases"), 1) // Dump inline compiler phases
CONFIG_METHODSET(JitEHDump, W("JitEHDump")) // Dump the EH table for the method, as reported to the VM
CONFIG_METHODSET(JitExclude, W("JitExclude"))
CONFIG_INTEGER(JitFakeProcedureSplitting, W("JitFakeProcedureSplitting"), 0) // Do code splitting independent of VM.
// For now, this disables unwind info for
// cold sections, breaking stack walks.
// Set COMPlus_GCgen0size=1000000 to avoid
// running the GC, which requires
// stack-walking.
CONFIG_METHODSET(JitForceProcedureSplitting, W("JitForceProcedureSplitting"))
CONFIG_METHODSET(JitGCDump, W("JitGCDump"))
CONFIG_METHODSET(JitDebugDump, W("JitDebugDump"))
Expand Down

0 comments on commit 70fd5dc

Please sign in to comment.