From 11f24dd3b68da68dbbbd222dac98768f6893f30c Mon Sep 17 00:00:00 2001 From: Wraith Date: Tue, 7 Jun 2022 20:36:45 +0100 Subject: [PATCH] JIT: Remove unneeded unconditional jumps (#69041) * resolve merge conflict * add checks for post-call jmp removal * use emitIsLastInsCall() to preclude call,jmp pairs and simplfy emitRemoveJumpToNextInst check for aligned instruction groups and skip them to preserve alignment linearize emitRemoveJumpToNextInst, add safety asserts for ordering and comment the requirement, add debug output explaining decisions * address feedback * change instruction omit to use code size zero * address feedback * add assertion info dump * address feedback * address feedback * remove end loop cleanup. cleanup jitdump messages and add early out flag. * address feedback, formatting * update variable naming and clarify comments * more feedback * add last instruction check and group flag set in emitSavIG * more feedback * fix jitdump output and make removal candidate printing optional * remove emitDispJumpList parameter * change overall size calculation and allow 0 size align to be printed in dumps * fix invorrect jmp check, remove inline for emitInstHasNoCode * allow alignment printing even when zero * remove debug printf and fix x86 * address feedback --- src/coreclr/jit/codegen.h | 5 +- src/coreclr/jit/codegenarmarch.cpp | 12 ++ src/coreclr/jit/codegencommon.cpp | 5 +- src/coreclr/jit/codegenlinear.cpp | 19 ++- src/coreclr/jit/codegenxarch.cpp | 24 +++ src/coreclr/jit/emit.cpp | 257 ++++++++++++++++++++++++++++- src/coreclr/jit/emit.h | 32 +++- src/coreclr/jit/emitarm.h | 6 + src/coreclr/jit/emitarm64.h | 6 + src/coreclr/jit/emitpub.h | 6 - src/coreclr/jit/emitxarch.cpp | 94 ++++++++--- src/coreclr/jit/emitxarch.h | 6 + src/coreclr/jit/instr.cpp | 26 --- 13 files changed, 429 insertions(+), 69 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index d567ef1537a37..8646d9191edcf 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -1472,8 +1472,11 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX public: void instGen(instruction ins); - +#if defined(TARGET_XARCH) + void inst_JMP(emitJumpKind jmp, BasicBlock* tgtBlock, bool isRemovableJmpCandidate = false); +#else void inst_JMP(emitJumpKind jmp, BasicBlock* tgtBlock); +#endif void inst_SET(emitJumpKind condition, regNumber reg); diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index a706893053fd5..f5de40823259e 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -4288,6 +4288,18 @@ void CodeGen::inst_SETCC(GenCondition condition, var_types type, regNumber dstRe #endif } +//------------------------------------------------------------------------ +// inst_JMP: Generate a jump instruction. +// +void CodeGen::inst_JMP(emitJumpKind jmp, BasicBlock* tgtBlock) +{ +#if !FEATURE_FIXED_OUT_ARGS + assert((tgtBlock->bbTgtStkDepth * sizeof(int) == genStackLevel) || isFramePointerUsed()); +#endif // !FEATURE_FIXED_OUT_ARGS + + GetEmitter()->emitIns_J(emitter::emitJumpKindToIns(jmp), tgtBlock); +} + //------------------------------------------------------------------------ // genCodeForStoreBlk: Produce code for a GT_STORE_OBJ/GT_STORE_DYN_BLK/GT_STORE_BLK node. // diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index b755578022986..ed16b9a9fa498 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -1944,11 +1944,12 @@ void CodeGen::genGenerateMachineCode() #endif // DEBUG /* We can now generate the function prolog and epilog */ - genGeneratePrologsAndEpilogs(); - /* Bind jump distances */ + // check to see if any jumps can be removed + GetEmitter()->emitRemoveJumpToNextInst(); + /* Bind jump distances */ GetEmitter()->emitJumpDistBind(); #if FEATURE_LOOP_ALIGN diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index d89217cdcdab4..6a44f1db324de 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -753,7 +753,24 @@ void CodeGen::genCodeForBBlist() break; case BBJ_ALWAYS: - inst_JMP(EJ_jmp, block->bbJumpDest); + inst_JMP(EJ_jmp, block->bbJumpDest +#ifdef TARGET_AMD64 + // AMD64 requires an instruction after a call instruction for unwinding + // inside an EH region so if the last instruction generated was a call instruction + // do not allow this jump to be marked for possible later removal. + // + // If a block was selected to place an alignment instruction because it ended + // with a jump, do not remove jumps from such blocks. + , + /* isRemovableJmpCandidate */ !GetEmitter()->emitIsLastInsCall() && !block->hasAlign() +#else +#ifdef TARGET_XARCH + , + /* isRemovableJmpCandidate */ !block->hasAlign() +#endif + +#endif + ); FALLTHROUGH; case BBJ_COND: diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 6302e12440b3a..039d4c1118522 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -1436,6 +1436,30 @@ void CodeGen::inst_SETCC(GenCondition condition, var_types type, regNumber dstRe } } +//------------------------------------------------------------------------ +// inst_JMP: Generate a jump instruction. +// +void CodeGen::inst_JMP(emitJumpKind jmp, BasicBlock* tgtBlock, bool isRemovableJmpCandidate) +{ +#if !FEATURE_FIXED_OUT_ARGS + // On the x86 we are pushing (and changing the stack level), but on x64 and other archs we have + // a fixed outgoing args area that we store into and we never change the stack level when calling methods. + // + // Thus only on x86 do we need to assert that the stack level at the target block matches the current stack level. + // + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef UNIX_X86_ABI + // bbTgtStkDepth is a (pure) argument count (stack alignment padding should be excluded). + assert((tgtBlock->bbTgtStkDepth * sizeof(int) == (genStackLevel - curNestedAlignment)) || isFramePointerUsed()); +#else + assert((tgtBlock->bbTgtStkDepth * sizeof(int) == genStackLevel) || isFramePointerUsed()); +#endif +#endif // !FEATURE_FIXED_OUT_ARGS + + GetEmitter()->emitIns_J(emitter::emitJumpKindToIns(jmp), tgtBlock, 0, isRemovableJmpCandidate); +} + //------------------------------------------------------------------------ // genCodeForReturnTrap: Produce code for a GT_RETURNTRAP node. // diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index b9d013073892b..ebd4f2120585a 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -1006,6 +1006,15 @@ insGroup* emitter::emitSavIG(bool emitAdd) assert(emitLastIns != nullptr); assert(emitCurIGfreeBase <= (BYTE*)emitLastIns); assert((BYTE*)emitLastIns < emitCurIGfreeBase + sz); + +#if defined(TARGET_XARCH) + assert(emitLastIns != nullptr); + if (emitLastIns->idIns() == INS_jmp) + { + ig->igFlags |= IGF_HAS_REMOVABLE_JMP; + } +#endif + emitLastIns = (instrDesc*)((BYTE*)id + ((BYTE*)emitLastIns - (BYTE*)emitCurIGfreeBase)); } @@ -1073,10 +1082,11 @@ void emitter::emitBegFN(bool hasFramePtr emitJumpList = emitJumpLast = nullptr; emitCurIGjmpList = nullptr; - emitFwdJumps = false; - emitNoGCRequestCount = 0; - emitNoGCIG = false; - emitForceNewIG = false; + emitFwdJumps = false; + emitNoGCRequestCount = 0; + emitNoGCIG = false; + emitForceNewIG = false; + emitContainsRemovableJmpCandidates = false; #if FEATURE_LOOP_ALIGN /* We don't have any align instructions */ @@ -3811,10 +3821,21 @@ void emitter::emitDispIG(insGroup* ig, insGroup* igPrev, bool verbose) { instrDesc* id = (instrDesc*)ins; +#ifdef TARGET_XARCH + if (emitJmpInstHasNoCode(id)) + { + // an instruction with no code prevents us being able to iterate to the + // next instructions so we must be certain that when we find one it is + // the last instruction in a group + assert(cnt == 1); + break; + } +#endif emitDispIns(id, false, true, false, ofs, nullptr, 0, ig); ins += emitSizeOfInsDsc(id); ofs += id->idCodeSize(); + } while (--cnt); printf("\n"); @@ -3864,6 +3885,29 @@ void emitter::emitDispGCinfo() printf("\n\n"); } +//------------------------------------------------------------------------ +// emitDispJumpList: displays the current emitter jump list +// +void emitter::emitDispJumpList() +{ + printf("Emitter Jump List:\n"); + unsigned int jmpCount = 0; + for (instrDescJmp* jmp = emitJumpList; jmp != nullptr; jmp = jmp->idjNext) + { + printf("IG%02u IN%04x %3s[%u] -> IG%02u %s\n", jmp->idjIG->igNum, jmp->idDebugOnlyInfo()->idNum, + codeGen->genInsDisplayName(jmp), jmp->idCodeSize(), + ((insGroup*)emitCodeGetCookie(jmp->idAddr()->iiaBBlabel))->igNum, +#if defined(TARGET_XARCH) + jmp->idjIsRemovableJmpCandidate ? " ; removal candidate" : "" +#else + "" +#endif + ); + jmpCount += 1; + } + printf(" total jump count: %u\n", jmpCount); +} + #endif // DEBUG /***************************************************************************** @@ -4102,6 +4146,205 @@ void emitter::emitDispCommentForHandle(size_t handle, GenTreeFlags flag) #endif // DEBUG } +//**************************************************************************** +// emitRemoveJumpToNextInst: Checks all jumps in the jump list to see if they are +// unconditional jumps marked with idjIsRemovableJmpCandidate,generated from +// BBJ_ALWAYS blocks. Any such candidate that jumps to the next instruction +// will be removed from the jump list. +// +// Assumptions: +// the jump list must be ordered by increasing igNum+insNo +// +void emitter::emitRemoveJumpToNextInst() +{ +#ifdef TARGET_XARCH + if (!emitContainsRemovableJmpCandidates) + { + return; + } + + JITDUMP("*************** In emitRemoveJumpToNextInst()\n"); +#ifdef DEBUG + if (EMIT_INSTLIST_VERBOSE) + { + JITDUMP("\nInstruction group list before unconditional jump to next instruction removal:\n\n"); + emitDispIGlist(true); + } + if (EMITVERBOSE) + { + emitDispJumpList(); + } +#endif // DEBUG + + UNATIVE_OFFSET totalRemovedSize = 0; + instrDescJmp* jmp = emitJumpList; + instrDescJmp* previousJmp = nullptr; +#if DEBUG + UNATIVE_OFFSET previousJumpIgNum = (UNATIVE_OFFSET)-1; + unsigned int previousJumpInsNum = -1; +#endif // DEBUG + + while (jmp) + { + insGroup* jmpGroup = jmp->idjIG; + instrDescJmp* nextJmp = jmp->idjNext; + + if (jmp->idInsFmt() == IF_LABEL && emitIsUncondJump(jmp) && jmp->idjIsRemovableJmpCandidate) + { +#if DEBUG + assert((jmpGroup->igFlags & IGF_HAS_ALIGN) == 0); + assert((jmpGroup->igNum > previousJumpIgNum) || (previousJumpIgNum == (UNATIVE_OFFSET)-1) || + ((jmpGroup->igNum == previousJumpIgNum) && (jmp->idDebugOnlyInfo()->idNum > previousJumpInsNum))); + previousJumpIgNum = jmpGroup->igNum; + previousJumpInsNum = jmp->idDebugOnlyInfo()->idNum; +#endif // DEBUG + + // target group is not bound yet so use the cookie to fetch it + insGroup* targetGroup = (insGroup*)emitCodeGetCookie(jmp->idAddr()->iiaBBlabel); + + assert(targetGroup != nullptr); + + if ((jmpGroup->igNext == targetGroup) && ((jmpGroup->igFlags & IGF_HAS_REMOVABLE_JMP) != 0)) + { + // the last instruction in the group is the jmp we're looking for + // and it jumps to the next instruction group so we don't need it + CLANG_FORMAT_COMMENT_ANCHOR + +#ifdef DEBUG + unsigned instructionCount = jmpGroup->igInsCnt; + assert(instructionCount > 0); + instrDesc* id = nullptr; + { + BYTE* dataPtr = jmpGroup->igData; + while (instructionCount > 0) + { + id = (instrDesc*)dataPtr; + dataPtr += emitSizeOfInsDsc(id); + instructionCount -= 1; + } + } + assert(id != nullptr); + if (jmp != id) + { + printf("jmp != id, dumping context information\n"); + printf("method: %s\n", emitComp->impInlineRoot()->info.compMethodName); + printf(" jmp: %u: ", jmp->idDebugOnlyInfo()->idNum); + emitDispIns(jmp, false, true, false, 0, nullptr, 0, jmpGroup); + printf(" id: %u: ", id->idDebugOnlyInfo()->idNum); + emitDispIns(id, false, true, false, 0, nullptr, 0, jmpGroup); + printf("jump group:\n"); + emitDispIG(jmpGroup, nullptr, true); + printf("target group:\n"); + emitDispIG(targetGroup, nullptr, false); + assert(jmp == id); + } +#endif // DEBUG + + JITDUMP("IG%02u IN%04x is the last instruction in the group and jumps to the next instruction group " + "IG%02u %s, removing.\n", + jmpGroup->igNum, jmp->idDebugOnlyInfo()->idNum, targetGroup->igNum, + emitLabelString(targetGroup)); + + // Unlink the jump from emitJumpList while keeping the previousJmp the same. + if (previousJmp != nullptr) + { + previousJmp->idjNext = jmp->idjNext; + if (jmp == emitJumpLast) + { + emitJumpLast = previousJmp; + } + } + else + { + assert(jmp == emitJumpList); + emitJumpList = jmp->idjNext; + } + + UNATIVE_OFFSET codeSize = jmp->idCodeSize(); + jmp->idCodeSize(0); + + jmpGroup->igSize -= (unsigned short)codeSize; + jmpGroup->igFlags |= IGF_UPD_ISZ; + + emitTotalCodeSize -= codeSize; + totalRemovedSize += codeSize; + } + else + { + // Update the previousJmp + previousJmp = jmp; +#if DEBUG + if (targetGroup == nullptr) + { + JITDUMP("IG%02u IN%04x jump target is not set!, keeping.\n", jmpGroup->igNum, + jmp->idDebugOnlyInfo()->idNum); + } + else if ((jmpGroup->igFlags & IGF_HAS_ALIGN) != 0) + { + JITDUMP("IG%02u IN%04x containing instruction group has alignment, keeping.\n", jmpGroup->igNum, + jmp->idDebugOnlyInfo()->idNum); + } + else if (jmpGroup->igNext != targetGroup) + { + JITDUMP("IG%02u IN%04x does not jump to the next instruction group, keeping.\n", jmpGroup->igNum, + jmp->idDebugOnlyInfo()->idNum); + } + else if ((jmpGroup->igFlags & IGF_HAS_REMOVABLE_JMP) == 0) + { + JITDUMP("IG%02u IN%04x containing instruction group is not marked with IGF_HAS_REMOVABLE_JMP, " + "keeping.\n", + jmpGroup->igNum, jmp->idDebugOnlyInfo()->idNum); + } +#endif // DEBUG + } + } + else + { + // Update the previousJmp + previousJmp = jmp; + } + + if (totalRemovedSize > 0) + { + insGroup* adjOffIG = jmpGroup->igNext; + insGroup* adjOffUptoIG = nextJmp != nullptr ? nextJmp->idjIG : emitIGlast; + while ((adjOffIG != nullptr) && (adjOffIG->igNum <= adjOffUptoIG->igNum)) + { + JITDUMP("Adjusted offset of IG%02u from %04X to %04X\n", adjOffIG->igNum, adjOffIG->igOffs, + (adjOffIG->igOffs - totalRemovedSize)); + adjOffIG->igOffs -= totalRemovedSize; + adjOffIG = adjOffIG->igNext; + } + } + + jmp = nextJmp; + } + +#ifdef DEBUG + if (totalRemovedSize > 0) + { + emitCheckIGoffsets(); + + if (EMIT_INSTLIST_VERBOSE) + { + printf("\nInstruction group list after unconditional jump to next instruction removal:\n\n"); + emitDispIGlist(false); + } + if (EMITVERBOSE) + { + emitDispJumpList(); + } + + JITDUMP("emitRemoveJumpToNextInst removed %u bytes of unconditional jumps\n", totalRemovedSize); + } + else + { + JITDUMP("emitRemoveJumpToNextInst removed no unconditional jumps\n"); + } +#endif // DEBUG +#endif // TARGET_XARCH +} + /***************************************************************************** * Bind targets of relative jumps to choose the smallest possible encoding. * X86 and AMD64 have a small and large encoding. @@ -4126,6 +4369,10 @@ void emitter::emitJumpDistBind() printf("\nInstruction list before jump distance binding:\n\n"); emitDispIGlist(true); } + if (EMITVERBOSE) + { + emitDispJumpList(); + } #endif instrDescJmp* jmp; @@ -6617,7 +6864,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, #ifdef DEBUG if (emitComp->opts.disAsm || emitComp->verbose) { - printf("\t\t\t\t\t\t;; size=%d bbWeight=%s PerfScore %.2f", ig->igSize, refCntWtd2str(ig->igWeight), + printf("\t\t\t\t\t\t;; size=%d bbWeight=%s PerfScore %.2f", (cp - bp), refCntWtd2str(ig->igWeight), ig->igPerfScore); } *instrCount += ig->igInsCnt; diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 18d57d1379079..fc90bd96d6909 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -279,6 +279,7 @@ struct insGroup // IG, or, if this IG contains with an unconditional branch, some subsequent IG. #define IGF_REMOVED_ALIGN 0x0800 // IG was marked as having an alignment instruction(s), but was later unmarked // without updating the IG's size/offsets. +#define IGF_HAS_REMOVABLE_JMP 0x1000 // this group ends with an unconditional jump which is a candidate for removal // Mask of IGF_* flags that should be propagated to new blocks when they are created. // This allows prologs and epilogs to be any number of IGs, but still be @@ -1534,13 +1535,21 @@ class emitter BYTE* idjAddr; // address of jump ins (for patching) } idjTemp; - unsigned idjOffs : 30; // Before jump emission, this is the byte offset within IG of the jump instruction. - // After emission, for forward jumps, this is the target offset -- in bytes from the - // beginning of the function -- of the target instruction of the jump, used to - // determine if this jump needs to be patched. - unsigned idjShort : 1; // is the jump known to be a short one? - unsigned idjKeepLong : 1; // should the jump be kept long? (used for - // hot to cold and cold to hot jumps) + // Before jump emission, this is the byte offset within IG of the jump instruction. + // After emission, for forward jumps, this is the target offset -- in bytes from the + // beginning of the function -- of the target instruction of the jump, used to + // determine if this jump needs to be patched. + unsigned idjOffs : +#if defined(TARGET_XARCH) + 29; + // indicates that the jump was added at the end of a BBJ_ALWAYS basic block and is + // a candidate for being removed if it jumps to the next instruction + unsigned idjIsRemovableJmpCandidate : 1; +#else + 30; +#endif + unsigned idjShort : 1; // is the jump known to be a short one? + unsigned idjKeepLong : 1; // should the jump be kept long? (used for hot to cold and cold to hot jumps) }; #if FEATURE_LOOP_ALIGN @@ -1723,6 +1732,7 @@ class emitter void emitDispIG(insGroup* ig, insGroup* igPrev = nullptr, bool verbose = false); void emitDispIGlist(bool verbose = false); void emitDispGCinfo(); + void emitDispJumpList(); void emitDispClsVar(CORINFO_FIELD_HANDLE fldHnd, ssize_t offs, bool reloc = false); void emitDispFrameRef(int varx, int disp, int offs, bool asmfm); void emitDispInsAddr(BYTE* code); @@ -2001,6 +2011,8 @@ class emitter instrDescJmp* emitJumpList; // list of local jumps in method instrDescJmp* emitJumpLast; // last of local jumps in method void emitJumpDistBind(); // Bind all the local jumps in method + bool emitContainsRemovableJmpCandidates; + void emitRemoveJumpToNextInst(); // try to remove unconditional jumps to the next instruction #if FEATURE_LOOP_ALIGN instrDescAlign* emitCurIGAlignList; // list of align instructions in current IG @@ -2132,6 +2144,12 @@ class emitter void emitEnableGC(); #endif // !defined(JIT32_GCENCODER) +#if defined(TARGET_XARCH) + static bool emitAlignInstHasNoCode(instrDesc* id); + static bool emitInstHasNoCode(instrDesc* id); + static bool emitJmpInstHasNoCode(instrDesc* id); +#endif + void emitGenIG(insGroup* ig); insGroup* emitSavIG(bool emitAdd = false); void emitNxtIG(bool extend = false); diff --git a/src/coreclr/jit/emitarm.h b/src/coreclr/jit/emitarm.h index 9dd2c175190dc..09133a1d88f02 100644 --- a/src/coreclr/jit/emitarm.h +++ b/src/coreclr/jit/emitarm.h @@ -192,6 +192,12 @@ inline static unsigned getBitWidth(emitAttr size) return (unsigned)size * BITS_PER_BYTE; } +/************************************************************************/ +/* Output target-independent instructions */ +/************************************************************************/ + +void emitIns_J(instruction ins, BasicBlock* dst, int instrCount = 0); + /************************************************************************/ /* The public entry points to output instructions */ /************************************************************************/ diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 0279b3360e75d..8970c15b3c090 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -705,6 +705,12 @@ inline static ssize_t computeRelPageAddr(size_t dstAddr, size_t srcAddr) return (dstAddr >> 12) - (srcAddr >> 12); } +/************************************************************************/ +/* Output target-independent instructions */ +/************************************************************************/ + +void emitIns_J(instruction ins, BasicBlock* dst, int instrCount = 0); + /************************************************************************/ /* The public entry points to output instructions */ /************************************************************************/ diff --git a/src/coreclr/jit/emitpub.h b/src/coreclr/jit/emitpub.h index 02ab3bb879d6f..6fc2a78707417 100644 --- a/src/coreclr/jit/emitpub.h +++ b/src/coreclr/jit/emitpub.h @@ -72,12 +72,6 @@ UNATIVE_OFFSET emitCodeOffset(void* blockPtr, unsigned codeOffs); const char* emitOffsetToLabel(unsigned offs); #endif // DEBUG -/************************************************************************/ -/* Output target-independent instructions */ -/************************************************************************/ - -void emitIns_J(instruction ins, BasicBlock* dst, int instrCount = 0); - /************************************************************************/ /* Emit initialized data sections */ /************************************************************************/ diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index a2f486d80cc8b..707c8fc25f7c3 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -2059,19 +2059,47 @@ const emitJumpKind emitReverseJumpKinds[] = { return emitReverseJumpKinds[jumpKind]; } -/***************************************************************************** - * The size for these instructions is less than EA_4BYTE, - * but the target register need not be byte-addressable - */ +//------------------------------------------------------------------------ +// emitAlignInstHasNoCode: Returns true if the 'id' is an align instruction +// that was later removed and hence has codeSize==0. +// +// Arguments: +// id -- The instruction to check +// +/* static */ bool emitter::emitAlignInstHasNoCode(instrDesc* id) +{ + return (id->idIns() == INS_align) && (id->idCodeSize() == 0); +} -inline bool emitInstHasNoCode(instruction ins) +//------------------------------------------------------------------------ +// emitJmpInstHasNoCode: Returns true if the 'id' is a jump instruction +// that was later removed and hence has codeSize==0. +// +// Arguments: +// id -- The instruction to check +// +/* static */ bool emitter::emitJmpInstHasNoCode(instrDesc* id) { - if (ins == INS_align) - { - return true; - } + bool result = (id->idIns() == INS_jmp) && (id->idCodeSize() == 0); - return false; + // A zero size jump instruction can only be the one that is marked + // as removable candidate. + assert(!result || ((instrDescJmp*)id)->idjIsRemovableJmpCandidate); + + return result; +} + +//------------------------------------------------------------------------ +// emitInstHasNoCode: Returns true if the 'id' is an instruction +// that was later removed and hence has codeSize==0. +// Currently it is one of `align` or `jmp`. +// +// Arguments: +// id -- The instruction to check +// +/* static */ bool emitter::emitInstHasNoCode(instrDesc* id) +{ + return emitAlignInstHasNoCode(id) || emitJmpInstHasNoCode(id); } /***************************************************************************** @@ -7464,7 +7492,10 @@ void emitter::emitSetShortJump(instrDescJmp* id) * to jump: positive is forward, negative is backward. */ -void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount /* = 0 */) +void emitter::emitIns_J(instruction ins, + BasicBlock* dst, + int instrCount /* = 0 */, + bool isRemovableJmpCandidate /* = false */) { UNATIVE_OFFSET sz; instrDescJmp* id = emitNewInstrJmp(); @@ -7493,7 +7524,9 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount /* = 0 } #endif // DEBUG - id->idjShort = 0; + emitContainsRemovableJmpCandidates |= isRemovableJmpCandidate; + id->idjIsRemovableJmpCandidate = isRemovableJmpCandidate ? 1 : 0; + id->idjShort = 0; if (dst != nullptr) { /* Assume the jump will be long */ @@ -9000,7 +9033,7 @@ void emitter::emitDispIns( /* By now the size better be set to something */ - assert(id->idCodeSize() || emitInstHasNoCode(ins)); + assert(id->idCodeSize() || emitInstHasNoCode(id)); /* Figure out the operand size */ @@ -13190,7 +13223,6 @@ BYTE* emitter::emitOutputIV(BYTE* dst, instrDesc* id) * This function also handles non-jumps that have jump-like characteristics, like RIP-relative LEA of a label that * needs to get bound to an actual address and processed by branch shortening. */ - BYTE* emitter::emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i) { unsigned srcOffs; @@ -13665,11 +13697,18 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) case IF_RWR_LABEL: case IF_SWR_LABEL: assert(id->idGCref() == GCT_NONE); - assert(id->idIsBound()); + assert(id->idIsBound() || emitJmpInstHasNoCode(id)); // TODO-XArch-Cleanup: handle IF_RWR_LABEL in emitOutputLJ() or change it to emitOutputAM()? - dst = emitOutputLJ(ig, dst, id); - sz = (id->idInsFmt() == IF_SWR_LABEL ? sizeof(instrDescLbl) : sizeof(instrDescJmp)); + if (id->idCodeSize() != 0) + { + dst = emitOutputLJ(ig, dst, id); + } + else + { + assert(((instrDescJmp*)id)->idjIsRemovableJmpCandidate); + } + sz = (id->idInsFmt() == IF_SWR_LABEL ? sizeof(instrDescLbl) : sizeof(instrDescJmp)); break; case IF_METHOD: @@ -14685,13 +14724,12 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) assert((int)emitCurStackLvl >= 0); - // Only epilog "instructions" and some pseudo-instrs - // are allowed not to generate any code + // Only epilog "instructions", some pseudo-instrs and blocks that ends with a jump to the next block - assert(*dp != dst || emitInstHasNoCode(ins)); + assert(*dp != dst || emitInstHasNoCode(id)); #ifdef DEBUG - if (emitComp->opts.disAsm || emitComp->verbose) + if ((emitComp->opts.disAsm || emitComp->verbose) && !emitJmpInstHasNoCode(id)) { emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(*dp), *dp, (dst - *dp)); } @@ -15462,6 +15500,20 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins break; case INS_jmp: + if (emitInstHasNoCode(id)) + { + // a removed jmp to the next instruction + result.insThroughput = PERFSCORE_THROUGHPUT_ZERO; + result.insLatency = PERFSCORE_LATENCY_ZERO; + } + else + { + // branch to a constant address + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_BRANCH_DIRECT; + } + break; + case INS_l_jmp: // branch to a constant address result.insThroughput = PERFSCORE_THROUGHPUT_2C; diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index 2f218cfcb0dcd..754632bb1c089 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -301,6 +301,12 @@ inline emitAttr emitDecodeScale(unsigned ensz) return emitter::emitSizeDecode[ensz]; } +/************************************************************************/ +/* Output target-independent instructions */ +/************************************************************************/ + +void emitIns_J(instruction ins, BasicBlock* dst, int instrCount = 0, bool isRemovableJmpCandidate = false); + /************************************************************************/ /* The public entry points to output instructions */ /************************************************************************/ diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index ae2831a3bdca7..9843482bbef6c 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -256,32 +256,6 @@ bool CodeGenInterface::instIsFP(instruction ins) #endif } -/***************************************************************************** - * - * Generate a jump instruction. - */ - -void CodeGen::inst_JMP(emitJumpKind jmp, BasicBlock* tgtBlock) -{ -#if !FEATURE_FIXED_OUT_ARGS - // On the x86 we are pushing (and changing the stack level), but on x64 and other archs we have - // a fixed outgoing args area that we store into and we never change the stack level when calling methods. - // - // Thus only on x86 do we need to assert that the stack level at the target block matches the current stack level. - // - CLANG_FORMAT_COMMENT_ANCHOR; - -#ifdef UNIX_X86_ABI - // bbTgtStkDepth is a (pure) argument count (stack alignment padding should be excluded). - assert((tgtBlock->bbTgtStkDepth * sizeof(int) == (genStackLevel - curNestedAlignment)) || isFramePointerUsed()); -#else - assert((tgtBlock->bbTgtStkDepth * sizeof(int) == genStackLevel) || isFramePointerUsed()); -#endif -#endif // !FEATURE_FIXED_OUT_ARGS - - GetEmitter()->emitIns_J(emitter::emitJumpKindToIns(jmp), tgtBlock); -} - /***************************************************************************** * * Generate a set instruction.