From f95a76b58ec0d9af3179a6104eebf4744f42d778 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 2 May 2024 11:24:50 +0200 Subject: [PATCH] JIT: Process all register kills in bulk in LSRA (#101760) `RefTypeKill` is one of the most common types of ref position because we create a separate one for every register whenever registers are killed, and registers are killed with very high frequency (for example, all callee trashed registers at calls). This can be seen in metrics: in `libraries_tests.run`, `RefTypeKill` is as common as `RefTypeUse` and `RefTypeDef` combined on x64, and 3x as common as them combined on arm64 due to having more registers. The main complication around changing `RefTypeKill` to represent killing a full set of registers is the fact that we want to be able to easily look up the next `RefPosition` at which a particular register is killed or used. Today, this is very simple: all kills (`RefTypeKill`) and uses (`RefTypeFixedReg`) participate in an intrusive linked list, and finding the next `RefPosition` is as simple as following this list. This PR changes LSRA to model the kills in bulk instead of creating an individual `RefPosition` for every kill. To handle finding the next fixed reg ref position LSRA is updated to take into account that there can be `RefPosition`'s corresponding to a register from two different linked lists: the `RefTypeFixedReg` intrusive linked list, and also a linked list of all `RefTypeKill` ref positions. The latter linked list may take some searching (not every kill necessarily kills the register we are looking for), but the reduction in number of `RefPosition`'s created more than makes up for this. No codegen diffs, but significant throughput improvements. This change reduces memory usage of the JIT by around 12% for arm64, and by around 7% for x64. --- src/coreclr/jit/lsra.cpp | 264 ++++++++++++++++++-------------- src/coreclr/jit/lsra.h | 17 +- src/coreclr/jit/lsraarmarch.cpp | 2 +- src/coreclr/jit/lsrabuild.cpp | 33 ++-- src/coreclr/jit/lsraxarch.cpp | 2 +- 5 files changed, 173 insertions(+), 145 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index efd861348223d..8f0875d7b09fc 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -271,20 +271,31 @@ regMaskTP LinearScan::lowSIMDRegs() #endif } -void LinearScan::updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPosition) +void LinearScan::updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPosition, RefPosition* nextKill) { - LsraLocation nextLocation; + LsraLocation nextLocation = nextRefPosition == nullptr ? MaxLocation : nextRefPosition->nodeLocation; - if (nextRefPosition == nullptr) + RefPosition* kill = nextKill; + while ((kill != nullptr) && (kill->nodeLocation < nextLocation)) + { + if ((kill->registerAssignment & genRegMask(regRecord->regNum)) != RBM_NONE) + { + nextLocation = kill->nodeLocation; + break; + } + + kill = kill->nextRefPosition; + } + + if (nextLocation == MaxLocation) { - nextLocation = MaxLocation; fixedRegs &= ~genRegMask(regRecord->regNum); } else { - nextLocation = nextRefPosition->nodeLocation; fixedRegs |= genRegMask(regRecord->regNum); } + nextFixedRef[regRecord->regNum] = nextLocation; } @@ -712,6 +723,8 @@ LinearScan::LinearScan(Compiler* theCompiler) , intervals(theCompiler->getAllocator(CMK_LSRA_Interval)) , allocationPassComplete(false) , refPositions(theCompiler->getAllocator(CMK_LSRA_RefPosition)) + , killHead(nullptr) + , killTail(&killHead) , listNodePool(theCompiler) { availableRegCount = ACTUAL_REG_COUNT; @@ -3948,6 +3961,41 @@ void LinearScan::unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPositio } } +//------------------------------------------------------------------------ +// processKills: Handle that some registers are being killed. +// +// Arguments: +// killRefPosition - The RefPosition for the kill +// +void LinearScan::processKills(RefPosition* killRefPosition) +{ + RefPosition* nextKill = killRefPosition->nextRefPosition; + + regMaskTP killedRegs = killRefPosition->registerAssignment; + while (killedRegs != RBM_NONE) + { + regNumber killedReg = genFirstRegNumFromMaskAndToggle(killedRegs); + RegRecord* regRecord = getRegisterRecord(killedReg); + Interval* assignedInterval = regRecord->assignedInterval; + if (assignedInterval != nullptr) + { + unassignPhysReg(regRecord, assignedInterval->recentRefPosition); + clearConstantReg(regRecord->regNum, assignedInterval->registerType); + makeRegAvailable(regRecord->regNum, assignedInterval->registerType); + } + + assert((nextFixedRef[killedReg] == killRefPosition->nodeLocation) || (killedReg >= AVAILABLE_REG_COUNT)); + RefPosition* regNextRefPos = regRecord->recentRefPosition == nullptr + ? regRecord->firstRefPosition + : regRecord->recentRefPosition->nextRefPosition; + updateNextFixedRef(regRecord, regNextRefPos, nextKill); + } + + regsBusyUntilKill &= ~killRefPosition->registerAssignment; + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KILL_REGS, nullptr, REG_NA, nullptr, NONE, + killRefPosition->registerAssignment)); +} + //------------------------------------------------------------------------ // spillGCRefs: Spill any GC-type intervals that are currently in registers. // @@ -4865,7 +4913,7 @@ void LinearScan::allocateRegistersMinimal() { RegRecord* physRegRecord = getRegisterRecord(reg); physRegRecord->recentRefPosition = nullptr; - updateNextFixedRef(physRegRecord, physRegRecord->firstRefPosition); + updateNextFixedRef(physRegRecord, physRegRecord->firstRefPosition, killHead); assert(physRegRecord->assignedInterval == nullptr); } @@ -4886,7 +4934,8 @@ void LinearScan::allocateRegistersMinimal() } #endif // DEBUG - BasicBlock* currentBlock = nullptr; + BasicBlock* currentBlock = nullptr; + RefPosition* nextKill = killHead; LsraLocation prevLocation = MinLocation; regMaskTP regsToFree = RBM_NONE; @@ -4906,8 +4955,6 @@ void LinearScan::allocateRegistersMinimal() for (RefPosition& currentRefPosition : refPositions) { - RefPosition* nextRefPosition = currentRefPosition.nextRefPosition; - // TODO: Can we combine this with the freeing of registers below? It might // mess with the dump, since this was previously being done before the call below // to dumpRegRecords. @@ -5017,7 +5064,7 @@ void LinearScan::allocateRegistersMinimal() } else { - assert((refType == RefTypeBB) || (refType == RefTypeKillGCRefs)); + assert((refType == RefTypeBB) || (refType == RefTypeKill) || (refType == RefTypeKillGCRefs)); } #ifdef DEBUG @@ -5066,65 +5113,59 @@ void LinearScan::allocateRegistersMinimal() continue; } + if (refType == RefTypeKill) + { + assert(nextKill == ¤tRefPosition); + processKills(¤tRefPosition); + nextKill = nextKill->nextRefPosition; + continue; + } + if (refType == RefTypeKillGCRefs) { spillGCRefs(¤tRefPosition); continue; } - if (currentRefPosition.isPhysRegRef) + if (refType == RefTypeFixedReg) { RegRecord* regRecord = currentRefPosition.getReg(); Interval* assignedInterval = regRecord->assignedInterval; - updateNextFixedRef(regRecord, currentRefPosition.nextRefPosition); + updateNextFixedRef(regRecord, currentRefPosition.nextRefPosition, nextKill); - // If this is a FixedReg, disassociate any inactive constant interval from this register. - // Otherwise, do nothing. - if (refType == RefTypeFixedReg) + // This is a FixedReg. Disassociate any inactive constant interval from this register. + if (assignedInterval != nullptr && !assignedInterval->isActive && assignedInterval->isConstant) { - if (assignedInterval != nullptr && !assignedInterval->isActive && assignedInterval->isConstant) - { - clearConstantReg(regRecord->regNum, assignedInterval->registerType); - regRecord->assignedInterval = nullptr; - spillCost[regRecord->regNum] = 0; + clearConstantReg(regRecord->regNum, assignedInterval->registerType); + regRecord->assignedInterval = nullptr; + spillCost[regRecord->regNum] = 0; #ifdef TARGET_ARM - // Update overlapping floating point register for TYP_DOUBLE - if (assignedInterval->registerType == TYP_DOUBLE) - { - RegRecord* otherRegRecord = findAnotherHalfRegRec(regRecord); - assert(otherRegRecord->assignedInterval == assignedInterval); - otherRegRecord->assignedInterval = nullptr; - spillCost[otherRegRecord->regNum] = 0; - } -#endif // TARGET_ARM - } - regsInUseThisLocation |= currentRefPosition.registerAssignment; - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FIXED_REG, nullptr, currentRefPosition.assignedReg())); - -#ifdef SWIFT_SUPPORT - if (currentRefPosition.delayRegFree) + // Update overlapping floating point register for TYP_DOUBLE + if (assignedInterval->registerType == TYP_DOUBLE) { - regsInUseNextLocation |= currentRefPosition.registerAssignment; + RegRecord* otherRegRecord = findAnotherHalfRegRec(regRecord); + assert(otherRegRecord->assignedInterval == assignedInterval); + otherRegRecord->assignedInterval = nullptr; + spillCost[otherRegRecord->regNum] = 0; } -#endif // SWIFT_SUPPORT +#endif // TARGET_ARM } - else + regsInUseThisLocation |= currentRefPosition.registerAssignment; + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FIXED_REG, nullptr, currentRefPosition.assignedReg())); + +#ifdef SWIFT_SUPPORT + if (currentRefPosition.delayRegFree) { - assert(refType == RefTypeKill); - if (assignedInterval != nullptr) - { - unassignPhysReg(regRecord, assignedInterval->recentRefPosition); - clearConstantReg(regRecord->regNum, assignedInterval->registerType); - makeRegAvailable(regRecord->regNum, assignedInterval->registerType); - } - clearRegBusyUntilKill(regRecord->regNum); - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, regRecord->regNum)); + regsInUseNextLocation |= currentRefPosition.registerAssignment; } +#endif // SWIFT_SUPPORT continue; } + assert(!currentRefPosition.isPhysRegRef); + regNumber assignedRegister = REG_NA; assert(currentRefPosition.isIntervalRef()); @@ -5532,7 +5573,7 @@ void LinearScan::allocateRegisters() { RegRecord* physRegRecord = getRegisterRecord(reg); physRegRecord->recentRefPosition = nullptr; - updateNextFixedRef(physRegRecord, physRegRecord->firstRefPosition); + updateNextFixedRef(physRegRecord, physRegRecord->firstRefPosition, killHead); // Is this an incoming arg register? (Note that we don't, currently, consider reassigning // an incoming arg register as having spill cost.) @@ -5573,7 +5614,8 @@ void LinearScan::allocateRegisters() } #endif // DEBUG - BasicBlock* currentBlock = nullptr; + BasicBlock* currentBlock = nullptr; + RefPosition* nextKill = killHead; LsraLocation prevLocation = MinLocation; regMaskTP regsToFree = RBM_NONE; @@ -5725,7 +5767,7 @@ void LinearScan::allocateRegisters() } else { - assert((refType == RefTypeBB) || (refType == RefTypeKillGCRefs)); + assert((refType == RefTypeBB) || (refType == RefTypeKill) || (refType == RefTypeKillGCRefs)); } #ifdef DEBUG @@ -5781,62 +5823,54 @@ void LinearScan::allocateRegisters() continue; } + if (refType == RefTypeKill) + { + assert(nextKill == ¤tRefPosition); + processKills(¤tRefPosition); + nextKill = nextKill->nextRefPosition; + continue; + } + if (refType == RefTypeKillGCRefs) { spillGCRefs(¤tRefPosition); continue; } - if (currentRefPosition.isPhysRegRef) + if (refType == RefTypeFixedReg) { RegRecord* regRecord = currentRefPosition.getReg(); Interval* assignedInterval = regRecord->assignedInterval; - updateNextFixedRef(regRecord, currentRefPosition.nextRefPosition); + updateNextFixedRef(regRecord, currentRefPosition.nextRefPosition, nextKill); - // If this is a FixedReg, disassociate any inactive constant interval from this register. - // Otherwise, do nothing. - if (refType == RefTypeFixedReg) + // This is a FixedReg. Disassociate any inactive constant interval from this register. + if (assignedInterval != nullptr && !assignedInterval->isActive && assignedInterval->isConstant) { - if (assignedInterval != nullptr && !assignedInterval->isActive && assignedInterval->isConstant) - { - clearConstantReg(regRecord->regNum, assignedInterval->registerType); - regRecord->assignedInterval = nullptr; - spillCost[regRecord->regNum] = 0; + clearConstantReg(regRecord->regNum, assignedInterval->registerType); + regRecord->assignedInterval = nullptr; + spillCost[regRecord->regNum] = 0; #ifdef TARGET_ARM - // Update overlapping floating point register for TYP_DOUBLE - if (assignedInterval->registerType == TYP_DOUBLE) - { - RegRecord* otherRegRecord = findAnotherHalfRegRec(regRecord); - assert(otherRegRecord->assignedInterval == assignedInterval); - otherRegRecord->assignedInterval = nullptr; - spillCost[otherRegRecord->regNum] = 0; - } -#endif // TARGET_ARM - } - regsInUseThisLocation |= currentRefPosition.registerAssignment; - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FIXED_REG, nullptr, currentRefPosition.assignedReg())); - -#ifdef SWIFT_SUPPORT - if (currentRefPosition.delayRegFree) + // Update overlapping floating point register for TYP_DOUBLE + if (assignedInterval->registerType == TYP_DOUBLE) { - regsInUseNextLocation |= currentRefPosition.registerAssignment; + RegRecord* otherRegRecord = findAnotherHalfRegRec(regRecord); + assert(otherRegRecord->assignedInterval == assignedInterval); + otherRegRecord->assignedInterval = nullptr; + spillCost[otherRegRecord->regNum] = 0; } -#endif // SWIFT_SUPPORT +#endif // TARGET_ARM } - else + regsInUseThisLocation |= currentRefPosition.registerAssignment; + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FIXED_REG, nullptr, currentRefPosition.assignedReg())); + +#ifdef SWIFT_SUPPORT + if (currentRefPosition.delayRegFree) { - assert(refType == RefTypeKill); - if (assignedInterval != nullptr) - { - unassignPhysReg(regRecord, assignedInterval->recentRefPosition); - clearConstantReg(regRecord->regNum, assignedInterval->registerType); - makeRegAvailable(regRecord->regNum, assignedInterval->registerType); - } - clearRegBusyUntilKill(regRecord->regNum); - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, regRecord->regNum)); + regsInUseNextLocation |= currentRefPosition.registerAssignment; } +#endif // SWIFT_SUPPORT continue; } @@ -7972,7 +8006,6 @@ void LinearScan::resolveRegisters() case RefTypeDef: // These are the ones we're interested in break; - case RefTypeKill: case RefTypeFixedReg: // These require no handling at resolution time assert(currentRefPosition->referent != nullptr); @@ -7988,6 +8021,7 @@ void LinearScan::resolveRegisters() currentRefPosition->getInterval()->getVarIndex(compiler))); currentRefPosition->referent->recentRefPosition = currentRefPosition; continue; + case RefTypeKill: case RefTypeKillGCRefs: // No action to take at resolution time, and no interval to update recentRefPosition for. continue; @@ -10937,7 +10971,7 @@ void LinearScan::TupleStyleDump(LsraTupleDumpMode mode) printf("\n Kill: "); killPrinted = true; } - printf(getRegName(currentRefPosition->assignedReg())); + compiler->dumpRegMask(currentRefPosition->registerAssignment); printf(" "); break; case RefTypeFixedReg: @@ -10965,8 +10999,12 @@ void LinearScan::TupleStyleDump(LsraTupleDumpMode mode) printf("\n\n"); } -void LinearScan::dumpLsraAllocationEvent( - LsraDumpEvent event, Interval* interval, regNumber reg, BasicBlock* currentBlock, RegisterScore registerScore) +void LinearScan::dumpLsraAllocationEvent(LsraDumpEvent event, + Interval* interval, + regNumber reg, + BasicBlock* currentBlock, + RegisterScore registerScore, + regMaskTP regMask) { if (!(VERBOSE)) { @@ -11171,6 +11209,15 @@ void LinearScan::dumpLsraAllocationEvent( printf("UVRes %-4s ", getRegName(reg)); break; + case LSRA_EVENT_KILL_REGS: + dumpRefPositionShort(activeRefPosition, currentBlock); + printf("None "); + dspRegMask(regMask); + printf("\n"); + dumpRefPositionShort(activeRefPosition, currentBlock); + printf(" "); + break; + // We currently don't dump anything for these events. case LSRA_EVENT_DEFUSE_FIXED_DELAY_USE: case LSRA_EVENT_SPILL_EXTENDED_LIFETIME: @@ -11560,7 +11607,7 @@ void LinearScan::dumpRefPositionShort(RefPosition* refPosition, BasicBlock* curr } else { - assert(refPosition->refType == RefTypeKillGCRefs); + assert((refPosition->refType == RefTypeKill) || (refPosition->refType == RefTypeKillGCRefs)); // There's no interval or reg name associated with this. printf(regNameFormat, " "); printf(" %s ", getRefTypeShortName(refPosition->refType)); @@ -11726,8 +11773,6 @@ void LinearScan::verifyFreeRegisters(regMaskTP regsToFree) assert(nextIntervalRef[reg] == MaxLocation); assert(spillCost[reg] == 0); } - LsraLocation thisNextFixedRef = physRegRecord->getNextRefLocation(); - assert(nextFixedRef[reg] == thisNextFixedRef); #ifdef TARGET_ARM // If this is occupied by a double interval, skip the corresponding float reg. if ((assignedInterval != nullptr) && (assignedInterval->registerType == TYP_DOUBLE)) @@ -11933,10 +11978,10 @@ void LinearScan::verifyFinalAllocation() break; case RefTypeKill: - assert(regRecord != nullptr); - assert(regRecord->assignedInterval == nullptr); - dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, regRecord->regNum, currentBlock); + dumpLsraAllocationEvent(LSRA_EVENT_KILL_REGS, nullptr, REG_NA, currentBlock, NONE, + currentRefPosition.registerAssignment); break; + case RefTypeFixedReg: assert(regRecord != nullptr); dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, regRecord->regNum, currentBlock); @@ -13258,18 +13303,13 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current else if (refPosition->isFixedRegRef && nextRefPos != nullptr && RefTypeIsUse(nextRefPos->refType) && !nextRefPos->isFixedRegRef && genMaxOneBit(refPosition->registerAssignment)) { - regNumber defReg = refPosition->assignedReg(); - RegRecord* defRegRecord = linearScan->getRegisterRecord(defReg); - - RefPosition* currFixedRegRefPosition = defRegRecord->recentRefPosition; - assert(currFixedRegRefPosition != nullptr && - currFixedRegRefPosition->nodeLocation == refPosition->nodeLocation); + regNumber defReg = refPosition->assignedReg(); // If there is another fixed reference to this register before the use, change the candidates // on this RefPosition to include that of nextRefPos. - RefPosition* nextFixedRegRefPosition = defRegRecord->getNextRefPosition(); - if (nextFixedRegRefPosition != nullptr && - nextFixedRegRefPosition->nodeLocation <= nextRefPos->getRefEndLocation()) + // TODO-Quirk: Should pass right type here, but previously this didn't consider TYP_DOUBLE case for arm32. + unsigned nextFixedRegRefLocation = linearScan->getNextFixedRef(defReg, TYP_I_IMPL); + if (nextFixedRegRefLocation <= nextRefPos->getRefEndLocation()) { candidates |= nextRefPos->registerAssignment; if (preferences == refPosition->registerAssignment) @@ -13724,18 +13764,12 @@ regMaskTP LinearScan::RegisterSelection::selectMinimal(Interval* else if (refPosition->isFixedRegRef && nextRefPos != nullptr && RefTypeIsUse(nextRefPos->refType) && !nextRefPos->isFixedRegRef && genMaxOneBit(refPosition->registerAssignment)) { - regNumber defReg = refPosition->assignedReg(); - RegRecord* defRegRecord = linearScan->getRegisterRecord(defReg); - - RefPosition* currFixedRegRefPosition = defRegRecord->recentRefPosition; - assert(currFixedRegRefPosition != nullptr && - currFixedRegRefPosition->nodeLocation == refPosition->nodeLocation); + regNumber defReg = refPosition->assignedReg(); // If there is another fixed reference to this register before the use, change the candidates // on this RefPosition to include that of nextRefPos. - RefPosition* nextFixedRegRefPosition = defRegRecord->getNextRefPosition(); - if (nextFixedRegRefPosition != nullptr && - nextFixedRegRefPosition->nodeLocation <= nextRefPos->getRefEndLocation()) + unsigned nextFixedRegRefLocation = linearScan->getNextFixedRef(defReg, TYP_I_IMPL); + if (nextFixedRegRefLocation <= nextRefPos->getRefEndLocation()) { candidates |= nextRefPos->registerAssignment; } diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 797c9d69c91d8..fe09a2e456441 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1083,8 +1083,7 @@ class LinearScan : public LinearScanInterface // insert refpositions representing prolog zero-inits which will be added later void insertZeroInitRefPositions(); - // add physreg refpositions for a tree node, based on calling convention and instruction selection predictions - void addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse); + void addKillForRegs(regMaskTP mask, LsraLocation currentLoc); void resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition); @@ -1262,6 +1261,7 @@ class LinearScan : public LinearScanInterface void setIntervalAsSplit(Interval* interval); void spillInterval(Interval* interval, RefPosition* fromRefPosition DEBUGARG(RefPosition* toRefPosition)); + void processKills(RefPosition* killRefPosition); void spillGCRefs(RefPosition* killRefPosition); /***************************************************************************** @@ -1581,6 +1581,7 @@ class LinearScan : public LinearScanInterface LSRA_EVENT_FREE_REGS, LSRA_EVENT_UPPER_VECTOR_SAVE, LSRA_EVENT_UPPER_VECTOR_RESTORE, + LSRA_EVENT_KILL_REGS, // Characteristics of the current RefPosition LSRA_EVENT_INCREMENT_RANGE_END, // ??? @@ -1606,7 +1607,8 @@ class LinearScan : public LinearScanInterface Interval* interval = nullptr, regNumber reg = REG_NA, BasicBlock* currentBlock = nullptr, - RegisterScore registerScore = NONE); + RegisterScore registerScore = NONE, + regMaskTP regMask = RBM_NONE); void validateIntervals(); @@ -1733,6 +1735,11 @@ class LinearScan : public LinearScanInterface // Ordered list of RefPositions RefPositionList refPositions; + // Head of linked list of RefTypeKill ref positions + RefPosition* killHead; + // Tail slot of linked list of RefTypeKill ref positions + RefPosition** killTail; + // Per-block variable location mappings: an array indexed by block number that yields a // pointer to an array of regNumber, one per variable. VarToRegMap* inVarToRegMaps; @@ -1897,7 +1904,7 @@ class LinearScan : public LinearScanInterface regMaskTP fixedRegs; LsraLocation nextFixedRef[REG_COUNT]; - void updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPosition); + void updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPosition, RefPosition* nextKill); LsraLocation getNextFixedRef(regNumber regNum, var_types regType) { LsraLocation loc = nextFixedRef[regNum]; @@ -2717,7 +2724,7 @@ class RefPosition bool IsPhysRegRef() { - return ((refType == RefTypeFixedReg) || (refType == RefTypeKill)); + return (refType == RefTypeFixedReg); } void setRegOptional(bool val) diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index f88173e05e209..cf5302dee9c71 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -419,7 +419,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // We could use RefTypeKill, but RefTypeFixedReg is used less commonly, so the check for delayRegFree // during register allocation should be cheaper in terms of TP. - RefPosition* pos = newRefPosition(REG_SWIFT_ERROR, currentLoc, RefTypeFixedReg, call, RBM_SWIFT_ERROR); + RefPosition* pos = newRefPosition(REG_SWIFT_ERROR, currentLoc + 1, RefTypeFixedReg, call, RBM_SWIFT_ERROR); setDelayFree(pos); } #endif // SWIFT_SUPPORT diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 22cba5900a7dd..4a4af7725dd4d 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -498,7 +498,7 @@ void LinearScan::associateRefPosWithInterval(RefPosition* rp) } else { - assert((rp->refType == RefTypeBB) || (rp->refType == RefTypeKillGCRefs)); + assert((rp->refType == RefTypeBB) || (rp->refType == RefTypeKillGCRefs) || (rp->refType == RefTypeKill)); } } @@ -570,7 +570,7 @@ RefPosition* LinearScan::newRefPosition(Interval* theInterval, } else { - assert(theRefType == RefTypeBB || theRefType == RefTypeKillGCRefs); + assert(theRefType == RefTypeBB || theRefType == RefTypeKillGCRefs || theRefType == RefTypeKill); } #ifdef DEBUG if (theInterval != nullptr && regType(theInterval->registerType) == FloatRegisterType) @@ -689,18 +689,14 @@ bool LinearScan::isContainableMemoryOp(GenTree* node) } //------------------------------------------------------------------------ -// addRefsForPhysRegMask: Adds RefPositions of the given type for all the registers in 'mask'. +// addKillForRegs: Adds a RefTypeKill ref position for the given registers. // // Arguments: // mask - the mask (set) of registers. // currentLoc - the location at which they should be added -// refType - the type of refposition -// isLastUse - true IFF this is a last use of the register // -void LinearScan::addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse) +void LinearScan::addKillForRegs(regMaskTP mask, LsraLocation currentLoc) { - assert(refType == RefTypeKill); - // The mask identifies a set of registers that will be used during // codegen. Mark these as modified here, so when we do final frame // layout, we'll know about all these registers. This is especially @@ -712,19 +708,10 @@ void LinearScan::addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, // modified until codegen, which is too late. compiler->codeGen->regSet.rsSetRegsModified(mask DEBUGARG(true)); - for (regMaskTP candidates = mask; candidates != RBM_NONE;) - { - regNumber reg = genFirstRegNumFromMaskAndToggle(candidates); - // This assumes that these are all "special" RefTypes that - // don't need to be recorded on the tree (hence treeNode is nullptr) - RefPosition* pos = newRefPosition(reg, currentLoc, refType, nullptr, - genRegMask(reg)); // This MUST occupy the physical register (obviously) + RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeKill, nullptr, mask); - if (isLastUse) - { - pos->lastUse = true; - } - } + *killTail = pos; + killTail = &pos->nextRefPosition; } //------------------------------------------------------------------------ @@ -1127,7 +1114,7 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo if (killMask != RBM_NONE) { - addRefsForPhysRegMask(killMask, currentLoc, RefTypeKill, true); + addKillForRegs(killMask, currentLoc); // TODO-CQ: It appears to be valuable for both fp and int registers to avoid killing the callee // save regs on infrequently executed paths. However, it results in a large number of asmDiffs, @@ -2546,7 +2533,7 @@ void LinearScan::buildIntervals() if ((block == compiler->fgFirstBB) && compiler->lvaHasAnySwiftStackParamToReassemble()) { assert(compiler->fgFirstBBisScratch()); - addRefsForPhysRegMask(genRegMask(REG_SCRATCH), currentLoc + 1, RefTypeKill, true); + addKillForRegs(genRegMask(REG_SCRATCH), currentLoc + 1); currentLoc += 2; } @@ -2564,7 +2551,7 @@ void LinearScan::buildIntervals() // Poisoning uses REG_SCRATCH for small vars and memset helper for big vars. killed = genRegMask(REG_SCRATCH) | compiler->compHelperCallKillSet(CORINFO_HELP_NATIVE_MEMSET); #endif - addRefsForPhysRegMask(killed, currentLoc + 1, RefTypeKill, true); + addKillForRegs(killed, currentLoc + 1); currentLoc += 2; } diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 6abc86aab0ed5..7109c9087ac4e 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -1395,7 +1395,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // We could use RefTypeKill, but RefTypeFixedReg is used less commonly, so the check for delayRegFree // during register allocation should be cheaper in terms of TP. - RefPosition* pos = newRefPosition(REG_SWIFT_ERROR, currentLoc, RefTypeFixedReg, call, RBM_SWIFT_ERROR); + RefPosition* pos = newRefPosition(REG_SWIFT_ERROR, currentLoc + 1, RefTypeFixedReg, call, RBM_SWIFT_ERROR); setDelayFree(pos); } #endif // SWIFT_SUPPORT