From 46e8c657d11d251a0d295fe0191256e6602c0d87 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 14 Mar 2023 14:08:11 +0100 Subject: [PATCH 01/52] JIT: Prototype a generalized promotion pass --- src/coreclr/jit/CMakeLists.txt | 2 + src/coreclr/jit/compiler.cpp | 2 + src/coreclr/jit/compiler.h | 9 +- src/coreclr/jit/compmemkind.h | 1 + src/coreclr/jit/compphases.h | 1 + src/coreclr/jit/fgstmt.cpp | 6 +- src/coreclr/jit/gentree.cpp | 4 +- src/coreclr/jit/jitconfigvalues.h | 5 +- src/coreclr/jit/jitstd/vector.h | 2 +- src/coreclr/jit/loopcloning.h | 2 +- src/coreclr/jit/morphblock.cpp | 2 +- src/coreclr/jit/promotion.cpp | 1101 +++++++++++++++++++++++++++++ src/coreclr/jit/promotion.h | 25 + src/coreclr/jit/rationalize.cpp | 9 + 14 files changed, 1161 insertions(+), 10 deletions(-) create mode 100644 src/coreclr/jit/promotion.cpp create mode 100644 src/coreclr/jit/promotion.h diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index 480bfdc045e16..330a4c9bd76d9 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -153,6 +153,7 @@ set( JIT_SOURCES optimizer.cpp patchpoint.cpp phase.cpp + promotion.cpp rangecheck.cpp rationalize.cpp redundantbranchopts.cpp @@ -334,6 +335,7 @@ set( JIT_HEADERS objectalloc.h opcode.h phase.h + promotion.h rangecheck.h rationalize.h regalloc.h diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 13177d5275396..efbde3144009b 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4704,6 +4704,8 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // DoPhase(this, PHASE_EARLY_LIVENESS, &Compiler::fgEarlyLiveness); + DoPhase(this, PHASE_NEW_PROMOTE_STRUCTS, &Compiler::PromoteStructsNew); + // Run a simple forward substitution pass. // DoPhase(this, PHASE_FWD_SUB, &Compiler::fgForwardSub); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index ac83843c895fa..543d435f7976b 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2028,6 +2028,9 @@ class Compiler friend class CallArgs; friend class IndirectCallTransformer; friend class ProfileSynthesis; + friend class LocalsUseVisitor; + friend class Promotion; + friend class ReplaceVisitor; #ifdef FEATURE_HW_INTRINSICS friend struct HWIntrinsicInfo; @@ -2447,7 +2450,7 @@ class Compiler GenTree* gtNewOperNode(genTreeOps oper, var_types type, GenTree* op1); // For binary opers. - GenTree* gtNewOperNode(genTreeOps oper, var_types type, GenTree* op1, GenTree* op2); + GenTreeOp* gtNewOperNode(genTreeOps oper, var_types type, GenTree* op1, GenTree* op2); GenTreeCC* gtNewCC(genTreeOps oper, var_types type, GenCondition cond); GenTreeOpCC* gtNewOperCC(genTreeOps oper, var_types type, GenCondition cond, GenTree* op1, GenTree* op2); @@ -5721,9 +5724,9 @@ class Compiler private: void fgInsertStmtNearEnd(BasicBlock* block, Statement* stmt); void fgInsertStmtAtBeg(BasicBlock* block, Statement* stmt); - void fgInsertStmtAfter(BasicBlock* block, Statement* insertionPoint, Statement* stmt); public: + void fgInsertStmtAfter(BasicBlock* block, Statement* insertionPoint, Statement* stmt); void fgInsertStmtBefore(BasicBlock* block, Statement* insertionPoint, Statement* stmt); private: @@ -6059,6 +6062,8 @@ class Compiler PhaseStatus fgMarkAddressExposedLocals(); void fgSequenceLocals(Statement* stmt); + PhaseStatus PromoteStructsNew(); + PhaseStatus fgForwardSub(); bool fgForwardSubBlock(BasicBlock* block); bool fgForwardSubStatement(Statement* statement); diff --git a/src/coreclr/jit/compmemkind.h b/src/coreclr/jit/compmemkind.h index 03a8f56d28bc9..645a6b44f80ee 100644 --- a/src/coreclr/jit/compmemkind.h +++ b/src/coreclr/jit/compmemkind.h @@ -50,6 +50,7 @@ CompMemKindMacro(LoopHoist) CompMemKindMacro(Unknown) CompMemKindMacro(RangeCheck) CompMemKindMacro(CopyProp) +CompMemKindMacro(Promotion) CompMemKindMacro(SideEffects) CompMemKindMacro(ObjectAllocator) CompMemKindMacro(VariableLiveRanges) diff --git a/src/coreclr/jit/compphases.h b/src/coreclr/jit/compphases.h index c3f96621e80e6..6b92d80cdb440 100644 --- a/src/coreclr/jit/compphases.h +++ b/src/coreclr/jit/compphases.h @@ -43,6 +43,7 @@ CompPhaseNameMacro(PHASE_UPDATE_FINALLY_FLAGS, "Update finally target flag CompPhaseNameMacro(PHASE_EARLY_UPDATE_FLOW_GRAPH, "Update flow graph early pass", false, -1, false) CompPhaseNameMacro(PHASE_STR_ADRLCL, "Morph - Structs/AddrExp", false, -1, false) CompPhaseNameMacro(PHASE_EARLY_LIVENESS, "Early liveness", false, -1, false) +CompPhaseNameMacro(PHASE_NEW_PROMOTE_STRUCTS, "Generalized promotion", false, -1, false) CompPhaseNameMacro(PHASE_FWD_SUB, "Forward Substitution", false, -1, false) CompPhaseNameMacro(PHASE_MORPH_IMPBYREF, "Morph - ByRefs", false, -1, false) CompPhaseNameMacro(PHASE_PROMOTE_STRUCTS, "Morph - Promote Structs", false, -1, false) diff --git a/src/coreclr/jit/fgstmt.cpp b/src/coreclr/jit/fgstmt.cpp index 2269e19896308..4b8fff128399f 100644 --- a/src/coreclr/jit/fgstmt.cpp +++ b/src/coreclr/jit/fgstmt.cpp @@ -386,11 +386,15 @@ Statement* Compiler::fgNewStmtFromTree(GenTree* tree, BasicBlock* block, const D { Statement* stmt = gtNewStmt(tree, di); - if (fgNodeThreading != NodeThreading::None) + if (fgNodeThreading == NodeThreading::AllTrees) { gtSetStmtInfo(stmt); fgSetStmtSeq(stmt); } + else if (fgNodeThreading == NodeThreading::AllLocals) + { + fgSequenceLocals(stmt); + } #if DEBUG if (block != nullptr) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 64d0a7281b5be..e49bb0e8b5450 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -6941,7 +6941,7 @@ void GenTree::SetVtableForOper(genTreeOps oper) } #endif // DEBUGGABLE_GENTREE -GenTree* Compiler::gtNewOperNode(genTreeOps oper, var_types type, GenTree* op1, GenTree* op2) +GenTreeOp* Compiler::gtNewOperNode(genTreeOps oper, var_types type, GenTree* op1, GenTree* op2) { assert(op1 != nullptr); assert(op2 != nullptr); @@ -6950,7 +6950,7 @@ GenTree* Compiler::gtNewOperNode(genTreeOps oper, var_types type, GenTree* op1, // should call the appropriate constructor for the extended type. assert(!GenTree::IsExOp(GenTree::OperKind(oper))); - GenTree* node = new (this, oper) GenTreeOp(oper, type, op1, op2); + GenTreeOp* node = new (this, oper) GenTreeOp(oper, type, op1, op2); return node; } diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index caad83a958e28..871f83b9ddc36 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -129,8 +129,9 @@ CONFIG_INTEGER(JitNoHoist, W("JitNoHoist"), 0) CONFIG_INTEGER(JitNoInline, W("JitNoInline"), 0) // Disables inlining of all methods CONFIG_INTEGER(JitNoMemoryBarriers, W("JitNoMemoryBarriers"), 0) // If 1, don't generate memory barriers CONFIG_INTEGER(JitNoRegLoc, W("JitNoRegLoc"), 0) -CONFIG_INTEGER(JitNoStructPromotion, W("JitNoStructPromotion"), 0) // Disables struct promotion 1 - for all, 2 - for - // params. +CONFIG_INTEGER(JitNoStructPromotion, W("JitNoStructPromotion"), 0) // Disables struct promotion 1 - for all, 2 - for + // params. +CONFIG_INTEGER(JitNewStructPromotion, W("JitNewStructPromotion"), 1) // Use new struct promotion CONFIG_INTEGER(JitNoUnroll, W("JitNoUnroll"), 0) CONFIG_INTEGER(JitOrder, W("JitOrder"), 0) CONFIG_INTEGER(JitQueryCurrentStaticFieldClass, W("JitQueryCurrentStaticFieldClass"), 1) diff --git a/src/coreclr/jit/jitstd/vector.h b/src/coreclr/jit/jitstd/vector.h index 6a547be93c0b8..268ce3a0c43e8 100644 --- a/src/coreclr/jit/jitstd/vector.h +++ b/src/coreclr/jit/jitstd/vector.h @@ -733,7 +733,7 @@ void vector::insert_elements_helper(iterator iter, size_type size, ensure_capacity(m_nSize + size); - for (int src = m_nSize - 1, dst = m_nSize + size - 1; src >= (int) pos; --src, --dst) + for (int src = (int)(m_nSize - 1), dst = (int)(m_nSize + size - 1); src >= (int) pos; --src, --dst) { m_pArray[dst] = m_pArray[src]; } diff --git a/src/coreclr/jit/loopcloning.h b/src/coreclr/jit/loopcloning.h index 9ab83531d0f52..cf8ceb0dce9c9 100644 --- a/src/coreclr/jit/loopcloning.h +++ b/src/coreclr/jit/loopcloning.h @@ -885,7 +885,7 @@ struct LoopCloneContext // Ensure that the block condition is present, if not allocate space. JitExpandArrayStack*>* EnsureBlockConditions(unsigned loopNum, - unsigned totalBlocks); + unsigned totalBlocks); #ifdef DEBUG // Print the block conditions for the loop. diff --git a/src/coreclr/jit/morphblock.cpp b/src/coreclr/jit/morphblock.cpp index dd71007036756..a60de6ff2be74 100644 --- a/src/coreclr/jit/morphblock.cpp +++ b/src/coreclr/jit/morphblock.cpp @@ -778,7 +778,7 @@ void MorphCopyBlockHelper::TrySpecialCases() m_transformationDecision = BlockTransformation::SkipMultiRegSrc; m_result = m_asg; } - else if (m_src->IsCall() && m_dst->OperIs(GT_LCL_VAR) && m_dstVarDsc->CanBeReplacedWithItsField(m_comp)) + else if (m_src->gtEffectiveVal()->IsCall() && m_dst->OperIs(GT_LCL_VAR) && m_dstVarDsc->CanBeReplacedWithItsField(m_comp)) { JITDUMP("Not morphing a single reg call return\n"); m_transformationDecision = BlockTransformation::SkipSingleRegCallSrc; diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp new file mode 100644 index 0000000000000..6f059a414c4bd --- /dev/null +++ b/src/coreclr/jit/promotion.cpp @@ -0,0 +1,1101 @@ +#include "jitpch.h" +#include "promotion.h" +#include "jitstd/algorithm.h" + +PhaseStatus Compiler::PromoteStructsNew() +{ + if (!opts.OptEnabled(CLFLG_STRUCTPROMOTE)) + { + return PhaseStatus::MODIFIED_NOTHING; + } + +#ifdef DEBUG + if (JitConfig.JitNewStructPromotion() == 0) + { + return PhaseStatus::MODIFIED_NOTHING; + } +#endif + + Promotion prom(this); + return prom.Run(); +} + +struct Access +{ + ClassLayout* Layout; + unsigned Offset; + var_types AccessType; + + // Number of times we saw this access. + unsigned Count = 0; + // Number of times this access is on the RHS of an assignment where the LHS is a probable register candidate. + unsigned CountAssignmentsToRegisterCandidate = 0; + // Number of times this access is on the LHS of an assignment where the RHS is a probable register candidate. + unsigned CountAssignmentsFromRegisterCandidate = 0; + unsigned CountCallArgs = 0; + unsigned CountCallArgsByImplicitRef = 0; + unsigned CountCallArgsOnStack = 0; + unsigned CountReturns = 0; + unsigned CountPassedAsRetbuf = 0; + + Access( + unsigned offset, var_types accessType, ClassLayout* layout) + : Layout(layout), Offset(offset), AccessType(accessType) + { + } + + unsigned GetAccessSize() const + { + return AccessType == TYP_STRUCT ? Layout->GetSize() : genTypeSize(AccessType); + } + + bool Overlaps(unsigned otherStart, unsigned otherSize) const + { + unsigned end = Offset + GetAccessSize(); + if (end <= otherStart) + { + return false; + } + + unsigned otherEnd = otherStart + otherSize; + if (otherEnd <= Offset) + { + return false; + } + + return true; + } + +}; + +// Find first entry with an equal offset, or bitwise complement of first +// entry with a higher offset. +template +static size_t BinarySearch(const jitstd::vector& vec, unsigned offset) +{ + size_t min = 0; + size_t max = vec.size(); + while (min < max) + { + size_t mid = min + (max - min) / 2; + if (vec[mid].*field == offset) + { + while (mid > 0 && vec[mid - 1].*field == offset) + { + mid--; + } + + return mid; + } + if (vec[mid].*field < offset) + { + min = mid + 1; + } + else + { + max = mid; + } + } + + return ~min; +} + +struct Replacement +{ + unsigned Offset; + var_types AccessType; + unsigned LclNum; + ClassLayout* Layout; + bool NeedsWriteBack = true; + bool NeedsReadBack = false; + Replacement* Next = nullptr; + + Replacement(unsigned offset, var_types accessType, unsigned lclNum, ClassLayout* layout) + : Offset(offset), AccessType(accessType), LclNum(lclNum), Layout(layout) + { + } + + bool Overlaps(unsigned otherStart, unsigned otherSize) const + { + unsigned end = Offset + genTypeSize(AccessType); + if (end <= otherStart) + { + return false; + } + + unsigned otherEnd = otherStart + otherSize; + if (otherEnd <= Offset) + { + return false; + } + + return true; + } +}; + +enum class AccessKindFlags : uint32_t +{ + None = 0, + IsCallArg = 1, + IsAssignmentSource = 2, + IsAssignmentDestination = 4, + IsAssignmentToRegisterCandidate = 8, + IsAssignmentFromRegisterCandidate = 16, + IsCallArgByImplicitRef = 32, + IsCallArgOnStack = 64, + IsCallRetBuf = 128, + IsReturned = 256, +}; + +inline constexpr AccessKindFlags operator~(AccessKindFlags a) +{ + return (AccessKindFlags)(~(uint32_t)a); +} + +inline constexpr AccessKindFlags operator|(AccessKindFlags a, AccessKindFlags b) +{ + return (AccessKindFlags)((uint32_t)a | (uint32_t)b); +} + +inline constexpr AccessKindFlags operator&(AccessKindFlags a, AccessKindFlags b) +{ + return (AccessKindFlags)((uint32_t)a & (uint32_t)b); +} + +inline AccessKindFlags& operator|=(AccessKindFlags& a, AccessKindFlags b) +{ + return a = (AccessKindFlags)((uint32_t)a | (uint32_t)b); +} + +inline AccessKindFlags& operator&=(AccessKindFlags& a, AccessKindFlags b) +{ + return a = (AccessKindFlags)((uint32_t)a & (uint32_t)b); +} + +class LocalsUses +{ + jitstd::vector m_accesses; + +public: + LocalsUses(Compiler* comp) : m_accesses(comp->getAllocator(CMK_Promotion)) + { + } + + void RecordAccess(unsigned offs, var_types accessType, ClassLayout* accessLayout, AccessKindFlags flags) + { + Access* access = nullptr; + + size_t index = 0; + if (m_accesses.size() > 0) + { + index = BinarySearch(m_accesses, offs); + if ((ssize_t)index >= 0) + { + do + { + Access& candidateAccess = m_accesses[index]; + if (candidateAccess.AccessType == accessType) + { + // Some operations on SIMD types do not require a layout, but those can be merged with ones that do. + bool isMergeableLayout = + (candidateAccess.Layout == accessLayout) || + (varTypeIsSIMD(accessType) && (candidateAccess.Layout == nullptr)); + + if (isMergeableLayout) + { + access = &candidateAccess; + access->Layout = accessLayout; + break; + } + } + + index++; + } while (index < m_accesses.size() && m_accesses[index].Offset == offs); + } + else + { + index = ~index; + } + } + + if (access == nullptr) + { + m_accesses.insert(m_accesses.begin() + index, Access(offs, accessType, accessLayout)); + access = &m_accesses.back(); + } + + access->Count++; + AccessKindFlags assignmentTo = AccessKindFlags::IsAssignmentSource | AccessKindFlags::IsAssignmentToRegisterCandidate; + if ((flags & assignmentTo) == assignmentTo) + { + access->CountAssignmentsToRegisterCandidate++; + } + + AccessKindFlags assignmentFrom = AccessKindFlags::IsAssignmentDestination | AccessKindFlags::IsAssignmentFromRegisterCandidate; + if ((flags & assignmentFrom) == assignmentFrom) + { + access->CountAssignmentsFromRegisterCandidate++; + } + + if ((flags & AccessKindFlags::IsCallArg) != AccessKindFlags::None) + { + access->CountCallArgs++; + + if ((flags & AccessKindFlags::IsCallArgByImplicitRef) != AccessKindFlags::None) + { + access->CountCallArgsByImplicitRef++; + } + + if ((flags & AccessKindFlags::IsCallArgOnStack) != AccessKindFlags::None) + { + access->CountCallArgsOnStack++; + } + } + + if ((flags & AccessKindFlags::IsCallRetBuf) != AccessKindFlags::None) + { + access->CountPassedAsRetbuf++; + } + + if ((flags & AccessKindFlags::IsReturned) != AccessKindFlags::None) + { + access->CountReturns++; + } + } + + void PickPromotions(Compiler* comp, unsigned lclNum, jitstd::vector& replacements) + { + if (m_accesses.size() <= 0) + { + return; + } + + //jitstd::sort( + // m_accesses.begin(), m_accesses.end(), + // [](const Access& l, const Access& r) + // { + // if (l.Offset < r.Offset) + // { + // return true; + // } + + // if (l.Offset > r.Offset) + // { + // return false; + // } + + // if (l.Count > r.Count) + // { + // return true; + // } + + // if (l.Count < r.Count) + // { + // return false; + // } + + // // TODO: Better heuristics, use info about struct uses to decrease benefit of promotions + // if (r.AccessType == TYP_STRUCT) + // { + // if (l.AccessType == TYP_STRUCT) + // { + // return l.Layout < r.Layout; + // } + + // return true; + // } + + // if (l.AccessType == TYP_STRUCT) + // { + // return false; + // } + + // assert(l.AccessType != r.AccessType); + // return l.AccessType < r.AccessType; + // }); + + assert(replacements.size() == 0); + for (size_t i = 0; i < m_accesses.size(); i++) + { + const Access& access = m_accesses[i]; + + if (access.AccessType == TYP_STRUCT) + { + continue; + } + + if (varTypeIsSIMD(access.AccessType) && (access.Layout == nullptr)) + { + // We need a layout to create a local. + // TODO: Use canonical layout? + continue; + } + + if (!EvaluateReplacement(comp, lclNum, access)) + { + continue; + } + +#ifdef DEBUG + char buf[32]; + sprintf_s(buf, sizeof(buf), "V%02u.[%03u..%03u)", lclNum, access.Offset, access.Offset + genTypeSize(access.AccessType)); + size_t len = strlen(buf) + 1; + char* bufp = new (comp, CMK_DebugOnly) char[len]; + strcpy_s(bufp, len, buf); +#endif + unsigned newLcl = comp->lvaGrabTemp(false DEBUGARG(bufp)); + if (varTypeIsStruct(access.AccessType)) + { + comp->lvaSetStruct(newLcl, access.Layout->GetClassHandle(), false); + } + else + { + comp->lvaGetDesc(newLcl)->lvType = access.AccessType; + } + + replacements.push_back(Replacement(access.Offset, access.AccessType, newLcl, access.Layout)); + } + } + + bool EvaluateReplacement(Compiler* comp, unsigned lclNum, const Access& access) + { + unsigned countOverlappedCalls = 0; + unsigned countOverlappedReturns = 0; + unsigned countOverlappedRetbufs = 0; + bool overlap = false; + for (const Access& otherAccess : m_accesses) + { + if (&otherAccess == &access) + continue; + + if (!otherAccess.Overlaps(access.Offset, genTypeSize(access.AccessType))) + { + continue; + } + + if (otherAccess.AccessType != TYP_STRUCT) + { + return false; + } + + countOverlappedCalls += otherAccess.CountCallArgs; + countOverlappedReturns += otherAccess.CountReturns; + countOverlappedRetbufs += otherAccess.CountPassedAsRetbuf; + } + + unsigned costWithout = 0; + // A normal access without promotion looks like: + // mov reg, [reg+offs] + // Which is 5 bytes on x64. + + costWithout += access.Count * 50; + + // The register then also needs to be used. In many cases it is containable, however, so we pay a bit less for this. + costWithout += access.Count * 25; + + unsigned costWith = 0; + + // For any use we expect to just use the register directly. + costWith += access.Count * 25; + + unsigned numReadBacks = 0; + LclVarDsc* lcl = comp->lvaGetDesc(lclNum); + // For parameters we need an initial read back + if (lcl->lvIsParam) + { + numReadBacks++; + } + + numReadBacks += countOverlappedRetbufs; + costWith += numReadBacks * 50; + + // Write backs with TYP_REFs when the base local is an implicit byref + // involves checked write barriers, so they are very expensive. + // TODO-CQ: This should be adjusted once we type implicit byrefs as TYP_I_IMPL. + unsigned writeBackCost = comp->lvaIsImplicitByRefLocal(lclNum) && (access.AccessType == TYP_REF) ? 150 : 50; + unsigned numWriteBacks = countOverlappedCalls + countOverlappedReturns; + costWith += numWriteBacks * writeBackCost; + + JITDUMP("Evaluating access %s @ %03u\n", varTypeName(access.AccessType), access.Offset); + JITDUMP(" Write-back cost: %u\n", writeBackCost); + JITDUMP(" # write backs: %u\n", numWriteBacks); + JITDUMP(" # read backs: %u\n", numReadBacks); + JITDUMP(" Cost with: %u\n", costWith); + JITDUMP(" Cost without: %u\n", costWithout); + + if (costWith < costWithout) + { + JITDUMP(" Promoting replacement\n"); + return true; + } + + JITDUMP(" Disqualifying replacement\n"); + return false; + } + +#ifdef DEBUG + void Dump(unsigned lclNum) + { + if (m_accesses.size() <= 0) + { + return; + } + + printf("Accesses for V%02u\n", lclNum); + for (Access& access : m_accesses) + { + if (access.AccessType == TYP_STRUCT) + { + printf(" [%03u..%03u)\n", access.Offset, access.Offset + access.Layout->GetSize()); + } + else + { + printf(" %s @ %03u\n", varTypeName(access.AccessType), access.Offset); + } + + printf(" #: %u\n", access.Count); + printf(" # assignments to reg-candidate: %u\n", access.CountAssignmentsToRegisterCandidate); + printf(" # as call arg: %u\n", access.CountCallArgs); + printf(" # as implicit by-ref call arg: %u\n", access.CountCallArgsByImplicitRef); + printf(" # as on-stack call arg: %u\n", access.CountCallArgsOnStack); + printf(" # as retbuf: %u\n", access.CountPassedAsRetbuf); + printf(" # as returned value: %u\n\n", access.CountReturns); + } + } +#endif +}; + +class LocalsUseVisitor : public GenTreeVisitor +{ + Promotion* m_prom; + LocalsUses* m_uses; +public: + enum + { + DoPreOrder = true, + }; + + LocalsUseVisitor(Promotion* prom) : GenTreeVisitor(prom->m_compiler), m_prom(prom) + { + m_uses = reinterpret_cast(new (prom->m_compiler, CMK_Promotion) char[prom->m_compiler->lvaCount * sizeof(LocalsUses)]); + for (size_t i = 0; i < prom->m_compiler->lvaCount; i++) + new (&m_uses[i], jitstd::placement_t()) LocalsUses(prom->m_compiler); + } + + LocalsUses* GetUsesByLocal(unsigned lcl) { return &m_uses[lcl]; } + + fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) + { + GenTree* tree = *use; + + if (tree->OperIsLocal()) + { + GenTreeLclVarCommon* lcl = tree->AsLclVarCommon(); + LclVarDsc* dsc = m_compiler->lvaGetDesc(lcl); + if (!dsc->lvPromoted && (dsc->TypeGet() == TYP_STRUCT) && !dsc->IsAddressExposed()) + { + if (lcl->OperIsLocalAddr()) + { + assert(user->OperIs(GT_CALL) && dsc->IsHiddenBufferStructArg() && (user->AsCall()->gtArgs.GetRetBufferArg()->GetNode() == lcl)); + // TODO: We should record that this is used as the address + // of a retbuf -- it makes promotion less desirable as we + // have to reload fields back from the retbuf. + } + else + { + unsigned offs = lcl->GetLclOffs(); + var_types accessType = lcl->TypeGet(); + ClassLayout* accessLayout = varTypeIsStruct(lcl) ? lcl->GetLayout(m_compiler) : nullptr; + AccessKindFlags accessFlags = ClassifyLocalAccess(lcl, user); + m_uses[lcl->GetLclNum()].RecordAccess(offs, accessType, accessLayout, accessFlags); + } + } + } + + return fgWalkResult::WALK_CONTINUE; + } + + AccessKindFlags ClassifyLocalAccess(GenTreeLclVarCommon* lcl, GenTree* user) + { + AccessKindFlags flags = AccessKindFlags::None; + if (user->IsCall()) + { + GenTreeCall* call = user->AsCall(); + if (call->IsOptimizingRetBufAsLocal() && (m_compiler->gtCallGetDefinedRetBufLclAddr(call) == lcl)) + { + flags |= AccessKindFlags::IsCallRetBuf; + } + else + { + unsigned argIndex = 0; + for (CallArg& arg : call->gtArgs.Args()) + { + if (arg.GetNode() != lcl) + { + argIndex++; + continue; + } + + flags |= AccessKindFlags::IsCallArg; + + unsigned argSize = 0; + if (arg.GetSignatureType() != TYP_STRUCT) + { + argSize = genTypeSize(arg.GetSignatureType()); + } + else + { + argSize = m_compiler->typGetObjLayout(arg.GetSignatureClassHandle())->GetSize(); + } + +#ifdef WINDOWS_AMD64_ABI + if ((argSize != 1) && (argSize != 2) && (argSize != 4) && (argSize != 8)) + { + flags |= AccessKindFlags::IsCallArgByImplicitRef; + } + + if (argIndex >= 4) + { + flags |= AccessKindFlags::IsCallArgOnStack; + } +#endif + break; + } + } + } + + if (user->OperIs(GT_ASG)) + { + if (user->gtGetOp1() == lcl) + { + flags |= AccessKindFlags::IsAssignmentDestination; + + if (user->gtGetOp2()->OperIs(GT_LCL_VAR)) + { + LclVarDsc* dsc = m_compiler->lvaGetDesc(user->gtGetOp2()->AsLclVarCommon()); + if (!dsc->IsAddressExposed()) + { + flags |= AccessKindFlags::IsAssignmentFromRegisterCandidate; + } + } + } + + if (user->gtGetOp2() == lcl) + { + flags |= AccessKindFlags::IsAssignmentSource; + + if (user->gtGetOp1()->OperIs(GT_LCL_VAR)) + { + LclVarDsc* dsc = m_compiler->lvaGetDesc(user->gtGetOp1()->AsLclVarCommon()); + if (!dsc->IsAddressExposed()) + { + flags |= AccessKindFlags::IsAssignmentToRegisterCandidate; + } + } + } + } + + if (user->OperIs(GT_RETURN)) + { + assert(user->gtGetOp1() == lcl); + flags |= AccessKindFlags::IsReturned; + } + + return flags; + } +}; + +class ReplaceVisitor : public GenTreeVisitor +{ + Promotion* m_prom; + jitstd::vector* m_replacements; + BasicBlock* m_bb; + Statement* m_stmt; + unsigned m_seenAsgs; + unsigned m_seenAsgToLcl; + bool m_madeChanges; + Statement* m_lastStmt; +public: + enum + { + DoPostOrder = true, + UseExecutionOrder = true, + }; + + ReplaceVisitor(Promotion* prom, jitstd::vector* replacements) : + GenTreeVisitor(prom->m_compiler), m_prom(prom), m_replacements(replacements) + { + } + + bool MadeChanges() { return m_madeChanges; } + Statement* GetNextStatement() { return m_lastStmt->GetNextStmt(); } + + void Reset(BasicBlock* bb, Statement* stmt) + { + m_bb = bb; + m_stmt = stmt; + m_seenAsgs = 0; + m_madeChanges = false; + m_lastStmt = stmt; + } + + fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) + { + GenTree* tree = *use; + + // We handle all cases where we can see struct uses up front. + if (tree->OperIs(GT_ASG)) + { + // Assignments can be decomposed directly into accesses of the replacements. + return DecomposeAssignment(use, user); + } + + if (tree->OperIs(GT_CALL)) + { + // Calls need to store replacements back into the struct local for args + // and need to restore replacements from the result (for + // retbufs/returns). Lowering handles optimizing out the unnecessary copies. + return LoadStoreAroundCall(use, user); + } + + if (tree->OperIs(GT_RETURN)) + { + // Returns need to store replacements back into the struct local. + // Lowering will optimize away these copies when possible. + return StoreBeforeReturn((*use)->AsUnOp()); + } + + if (tree->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + ReplaceLocal(use, user); + return fgWalkResult::WALK_CONTINUE; + } + + return fgWalkResult::WALK_CONTINUE; + } + + fgWalkResult DecomposeAssignment(GenTree** use, GenTree* user) + { + GenTreeOp* asg = (*use)->AsOp(); + + // TODO-CQ: field-by-field copies and inits. + + if (asg->gtGetOp2()->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + GenTreeLclVarCommon* rhsLcl = asg->gtGetOp2()->AsLclVarCommon(); + if (rhsLcl->TypeIs(TYP_STRUCT)) + { + unsigned size = rhsLcl->GetLayout(m_compiler)->GetSize(); + WriteBackBefore(use, rhsLcl->GetLclNum(), rhsLcl->GetLclOffs(), size); + } + } + + if (asg->gtGetOp1()->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + GenTreeLclVarCommon* lhsLcl = asg->gtGetOp1()->AsLclVarCommon(); + if (lhsLcl->TypeIs(TYP_STRUCT)) + { + unsigned size = lhsLcl->GetLayout(m_compiler)->GetSize(); + ReadBackAfter(use, user, lhsLcl->GetLclNum(), lhsLcl->GetLclOffs(), size); + } + } + + return fgWalkResult::WALK_CONTINUE; + } + + fgWalkResult LoadStoreAroundCall(GenTree** use, GenTree* user) + { + GenTreeCall* call = (*use)->AsCall(); + + CallArg* retBufArg = nullptr; + for (CallArg& arg : call->gtArgs.Args()) + { + if (arg.GetWellKnownArg() == WellKnownArg::RetBuffer) + { + retBufArg = &arg; + continue; + } + + if (!arg.GetNode()->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + continue; + } + + GenTreeLclVarCommon* argNodeLcl = arg.GetNode()->AsLclVarCommon(); + + if (argNodeLcl->TypeIs(TYP_STRUCT)) + { + unsigned size = argNodeLcl->GetLayout(m_compiler)->GetSize(); + WriteBackBefore(use, argNodeLcl->GetLclNum(), argNodeLcl->GetLclOffs(), size); + } + } + + if (call->IsOptimizingRetBufAsLocal()) + { + assert(retBufArg != nullptr); + assert(retBufArg->GetNode()->OperIsLocalAddr()); + GenTreeLclVarCommon* retBufLcl = retBufArg->GetNode()->AsLclVarCommon(); + unsigned size = m_compiler->typGetObjLayout(call->gtRetClsHnd)->GetSize(); + + ReadBackAfter(use, user, retBufLcl->GetLclNum(), retBufLcl->GetLclOffs(), size); + } + + return fgWalkResult::WALK_CONTINUE; + } + + void ReplaceLocal(GenTree** use, GenTree* user) + { + GenTreeLclVarCommon* lcl = (*use)->AsLclVarCommon(); + unsigned lclNum = lcl->GetLclNum(); + jitstd::vector& replacements = m_replacements[lclNum]; + + if (replacements.size() <= 0) + { + return; + } + + unsigned offs = lcl->GetLclOffs(); + var_types accessType = lcl->TypeGet(); + +#ifdef DEBUG + if (accessType == TYP_STRUCT) + { + assert((user == nullptr) || user->OperIs(GT_ASG, GT_CALL, GT_RETURN)); + } + else + { + ClassLayout* accessLayout = varTypeIsStruct(accessType) ? lcl->GetLayout(m_compiler) : nullptr; + unsigned accessSize = accessLayout != nullptr ? accessLayout->GetSize() : genTypeSize(accessType); + for (const Replacement& rep : replacements) + { + assert(!rep.Overlaps(offs, accessSize) || ((rep.Offset == offs) && (rep.AccessType == accessType))); + } + + assert((accessType != TYP_STRUCT) || (accessLayout != nullptr)); + JITDUMP("Processing use [%06u] of V%02u.[%03u..%03u)\n", Compiler::dspTreeID(lcl), lclNum, offs, offs + accessSize); + } +#endif + + if (accessType != TYP_STRUCT) + { + size_t index = BinarySearch(replacements, offs); + if ((ssize_t)index >= 0) + { + Replacement& rep = replacements[index]; + assert(accessType == rep.AccessType); + JITDUMP(" ..replaced with promoted lcl V%02u\n", rep.LclNum); + *use = m_compiler->gtNewLclvNode(rep.LclNum, accessType); + + if ((lcl->gtFlags & GTF_VAR_DEF) != 0) + { + rep.NeedsWriteBack = true; + rep.NeedsReadBack = false; + } + else if (rep.NeedsReadBack) + { + GenTree* dst = m_compiler->gtNewLclvNode(rep.LclNum, rep.AccessType); + GenTree* src = m_compiler->gtNewLclFldNode(lclNum, rep.AccessType, rep.Offset); + *use = m_compiler->gtNewOperNode(GT_COMMA, (*use)->TypeGet(), m_compiler->gtNewAssignNode(dst, src), *use); + rep.NeedsReadBack = false; + } + + m_madeChanges = true; + } + } + } + + fgWalkResult StoreBeforeReturn(GenTreeUnOp* ret) + { + if (ret->TypeIs(TYP_VOID) || !ret->gtGetOp1()->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + return fgWalkResult::WALK_CONTINUE; + } + + GenTreeLclVarCommon* retLcl = ret->gtGetOp1()->AsLclVarCommon(); + if (retLcl->TypeIs(TYP_STRUCT)) + { + unsigned size = retLcl->GetLayout(m_compiler)->GetSize(); + WriteBackBefore(&ret->gtOp1, retLcl->GetLclNum(), retLcl->GetLclOffs(), size); + } + + return fgWalkResult::WALK_CONTINUE; + } + + // Write back all overlapping replacement fields in the specified range. + void WriteBackBefore(GenTree** use, unsigned lcl, unsigned offs, unsigned size) + { + jitstd::vector replacements = m_replacements[lcl]; + size_t index = BinarySearch(replacements, offs); + + if ((ssize_t)index < 0) + { + index = ~index; + if ((index > 0) && replacements[index - 1].Overlaps(offs, size)) + { + index--; + } + } + + unsigned end = offs + size; + while ((index < replacements.size()) && (replacements[index].Offset < end)) + { + Replacement& rep = replacements[index]; + if (rep.NeedsWriteBack) + { + GenTree* dst = m_compiler->gtNewLclFldNode(lcl, rep.AccessType, rep.Offset); + GenTree* src = m_compiler->gtNewLclvNode(rep.LclNum, rep.AccessType); + GenTreeOp* comma = m_compiler->gtNewOperNode(GT_COMMA, (*use)->TypeGet(), m_compiler->gtNewAssignNode(dst, src), *use); + *use = comma; + use = &comma->gtOp2; + + rep.NeedsWriteBack = false; + m_madeChanges = true; + } + + index++; + } + } + + void ReadBackAfter(GenTree** use, GenTree* user, unsigned lcl, unsigned offs, unsigned size) + { + jitstd::vector& replacements = m_replacements[lcl]; + size_t index = BinarySearch(replacements, offs); + + if ((ssize_t)index < 0) + { + index = ~index; + if ((index > 0) && replacements[index - 1].Overlaps(offs, size)) + { + index--; + } + } + + unsigned end = offs + size; + while ((index < replacements.size()) && (replacements[index].Offset < end)) + { + Replacement& rep = replacements[index]; + assert((rep.Offset >= offs) && (rep.Offset + genTypeSize(rep.AccessType) <= end)); + rep.NeedsReadBack = true; + rep.NeedsWriteBack = false; + index++; + + //GenTree* dst = m_compiler->gtNewLclvNode(rep.LclNum, rep.AccessType); + //GenTree* src = m_compiler->gtNewLclFldNode(lcl, rep.AccessType, rep.Offset); + + //GenTreeOp* comma = m_compiler->gtNewOperNode(GT_COMMA, TYP_VOID, *use, m_compiler->gtNewAssignNode(dst, src)); + //*use = comma; + //use = &comma->gtOp2; + + //m_madeChanges = true; + } + } +}; + +PhaseStatus Promotion::Run() +{ +#ifdef DEBUG + if (m_compiler->verbose) + { + m_compiler->fgDispBasicBlocks(true); + } + + //if (!ISMETHOD("NPTest")) + //{ + // return PhaseStatus::MODIFIED_NOTHING; + //} +#endif + + LocalsUseVisitor localsUse(this); + for (BasicBlock* bb : m_compiler->Blocks()) + { + for (Statement* stmt : bb->Statements()) + { + DISPSTMT(stmt); + localsUse.WalkTree(stmt->GetRootNodePointer(), nullptr); + } + } + + unsigned numLocals = m_compiler->lvaCount; + +#ifdef DEBUG + if (m_compiler->verbose) + { + for (unsigned lcl = 0; lcl < m_compiler->lvaCount; lcl++) + { + LocalsUses* uses = localsUse.GetUsesByLocal(lcl); + uses->Dump(lcl); + } + } +#endif + + bool anyReplacements = false; + jitstd::vector* replacements = reinterpret_cast*>(new (m_compiler, CMK_Promotion) char[sizeof(jitstd::vector) * m_compiler->lvaCount]); + for (unsigned i = 0; i < numLocals; i++) + { + new (&replacements[i], jitstd::placement_t()) jitstd::vector(m_compiler->getAllocator(CMK_Promotion)); + + localsUse.GetUsesByLocal(i)->PickPromotions(m_compiler, i, replacements[i]); + if (replacements[i].size() > 0) + { + JITDUMP("V%02u promoted with %d replacements\n", i, (int)replacements[i].size()); + for (const Replacement& rep : replacements[i]) + { + JITDUMP(" [%03u..%03u) promoted as %s V%02u\n", rep.Offset, rep.Offset + genTypeSize(rep.AccessType), varTypeName(rep.AccessType), rep.LclNum); + anyReplacements = true; + } + } + } + + if (!anyReplacements) + { + return PhaseStatus::MODIFIED_NOTHING; + } + + ReplaceVisitor replacer(this, replacements); + for (BasicBlock* bb : m_compiler->Blocks()) + { + for (Statement* stmt = bb->firstStmt(); stmt != nullptr;) + { + DISPSTMT(stmt); + replacer.Reset(bb, stmt); + replacer.WalkTree(stmt->GetRootNodePointer(), nullptr); + + if (replacer.MadeChanges()) + { + m_compiler->fgSequenceLocals(stmt); + JITDUMP("New statement:\n"); + DISPSTMT(stmt); + } + + stmt = replacer.GetNextStatement(); + } + + for (unsigned i = 0; i < numLocals; i++) + { + for (unsigned j = 0; j < replacements[i].size(); j++) + { + Replacement& rep = replacements[i][j]; + assert(!rep.NeedsReadBack || !rep.NeedsWriteBack); + if (rep.NeedsReadBack) + { + JITDUMP("Reading back dirty replacement V%02.[%03u..%03u) -> V%02u at the end of " FMT_BB "\n", + i, + rep.Offset, rep.Offset + genTypeSize(rep.AccessType), + rep.LclNum); + + GenTree* dst = m_compiler->gtNewLclvNode(rep.LclNum, rep.AccessType); + GenTree* src = m_compiler->gtNewLclFldNode(i, rep.AccessType, rep.Offset); + GenTree* asg = m_compiler->gtNewAssignNode(dst, src); + m_compiler->fgInsertStmtNearEnd(bb, m_compiler->fgNewStmtFromTree(asg)); + rep.NeedsReadBack = false; + } + + rep.NeedsWriteBack = true; + } + } + } + + Statement* prevParamLoadStmt = nullptr; + for (unsigned i = 0; i < m_compiler->info.compArgsCount; i++) + { + for (unsigned j = 0; j < replacements[i].size(); j++) + { + Replacement& rep = replacements[i][j]; + + m_compiler->fgEnsureFirstBBisScratch(); + + GenTree* dst = m_compiler->gtNewLclvNode(rep.LclNum, rep.AccessType); + GenTree* src = m_compiler->gtNewLclFldNode(i, rep.AccessType, rep.Offset); + GenTree* asg = m_compiler->gtNewAssignNode(dst, src); + Statement* stmt = m_compiler->fgNewStmtFromTree(asg); + if (prevParamLoadStmt != nullptr) + { + m_compiler->fgInsertStmtAfter(m_compiler->fgFirstBB, prevParamLoadStmt, stmt); + } + else + { + m_compiler->fgInsertStmtAtBeg(m_compiler->fgFirstBB, stmt); + } + + prevParamLoadStmt = stmt; + } + } + + return PhaseStatus::MODIFIED_EVERYTHING; +} + +bool Promotion::ParseLocation(GenTree* tree, unsigned* lcl, unsigned* offs, var_types* accessType, ClassLayout** accessLayout) +{ + *offs = 0; + *accessType = TYP_UNDEF; + *accessLayout = nullptr; + + while (true) + { + if (tree->OperIsLocalRead()) + { + *lcl = tree->AsLclVarCommon()->GetLclNum(); + *offs += tree->AsLclVarCommon()->GetLclOffs(); + if (*accessType == TYP_UNDEF) + { + *accessType = tree->TypeGet(); + *accessLayout = varTypeIsStruct(tree) ? tree->AsLclVarCommon()->GetLayout(m_compiler) : nullptr; + } + + return true; + } + + if (tree->OperIs(GT_FIELD) || tree->OperIsIndir()) + { + if (tree->OperIs(GT_FIELD)) + { + if (tree->AsField()->GetFldObj() == nullptr) + { + return false; + } + + if (*accessType == TYP_UNDEF) + { + CORINFO_CLASS_HANDLE fieldClassHandle; + var_types fieldType = m_compiler->eeGetFieldType(tree->AsField()->gtFldHnd, &fieldClassHandle); + + *accessType = fieldType == TYP_STRUCT ? m_compiler->impNormStructType(fieldClassHandle) : fieldType; + *accessLayout = varTypeIsStruct(fieldType) ? m_compiler->typGetObjLayout(fieldClassHandle) : nullptr; + } + + *offs += tree->AsField()->gtFldOffset; + tree = tree->AsField()->GetFldObj(); + } + else + { + if (*accessType == TYP_UNDEF) + { + *accessType = tree->TypeGet(); + *accessLayout = tree->OperIsBlk() ? tree->AsBlk()->GetLayout() : nullptr; + } + + tree = tree->AsIndir()->Addr(); + } + + switch (tree->gtOper) + { + case GT_ADDR: + tree = tree->gtGetOp1(); + continue; + case GT_LCL_VAR_ADDR: + *lcl = tree->AsLclVarCommon()->GetLclNum(); + return true; + case GT_LCL_FLD_ADDR: + *lcl = tree->AsLclVarCommon()->GetLclNum(); + *offs += tree->AsLclVarCommon()->GetLclOffs(); + return true; + default: + return false; + } + } + else + { + return false; + } + } +} diff --git a/src/coreclr/jit/promotion.h b/src/coreclr/jit/promotion.h new file mode 100644 index 0000000000000..7d6fbd78b3f61 --- /dev/null +++ b/src/coreclr/jit/promotion.h @@ -0,0 +1,25 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef _PROMOTION_H +#define _PROMOTION_H + +class Compiler; + +class Promotion +{ + Compiler* m_compiler; + + friend class LocalsUseVisitor; + friend class ReplaceVisitor; + + bool ParseLocation(GenTree* tree, unsigned* lcl, unsigned* offs, var_types* accessType, ClassLayout** accessLayout); +public: + explicit Promotion(Compiler* compiler) : m_compiler(compiler) + { + } + + PhaseStatus Run(); +}; + +#endif diff --git a/src/coreclr/jit/rationalize.cpp b/src/coreclr/jit/rationalize.cpp index d0acce7b1c910..9b0cd4718dbd3 100644 --- a/src/coreclr/jit/rationalize.cpp +++ b/src/coreclr/jit/rationalize.cpp @@ -499,6 +499,15 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, Compiler::Ge use = LIR::Use(BlockRange(), useEdge, parentStack.Top(1)); } + if (node->TypeIs(TYP_STRUCT)) + { + if (!node->OperIs(GT_CALL, GT_LCL_VAR, GT_ASG, GT_OBJ, GT_BLK, GT_LCL_FLD, GT_RETURN, GT_COMMA, GT_IND)) + { + printf("%s\n", GenTree::OpName(node->OperGet())); + assert(!"Uh oh"); + } + } + assert(node == use.Def()); switch (node->OperGet()) { From faa8260ab47b80c6e4b7638734d0641a4e1b106f Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 14 Mar 2023 14:13:54 +0100 Subject: [PATCH 02/52] Remove some old code --- src/coreclr/jit/rationalize.cpp | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/coreclr/jit/rationalize.cpp b/src/coreclr/jit/rationalize.cpp index 9b0cd4718dbd3..d0acce7b1c910 100644 --- a/src/coreclr/jit/rationalize.cpp +++ b/src/coreclr/jit/rationalize.cpp @@ -499,15 +499,6 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, Compiler::Ge use = LIR::Use(BlockRange(), useEdge, parentStack.Top(1)); } - if (node->TypeIs(TYP_STRUCT)) - { - if (!node->OperIs(GT_CALL, GT_LCL_VAR, GT_ASG, GT_OBJ, GT_BLK, GT_LCL_FLD, GT_RETURN, GT_COMMA, GT_IND)) - { - printf("%s\n", GenTree::OpName(node->OperGet())); - assert(!"Uh oh"); - } - } - assert(node == use.Def()); switch (node->OperGet()) { From b4cf69250aa15df49b61c8d49690f2e3a24ce01b Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 14 Mar 2023 16:21:27 +0100 Subject: [PATCH 03/52] Fix accounting mistake --- src/coreclr/jit/promotion.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 6f059a414c4bd..0a17920b884eb 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -220,8 +220,7 @@ class LocalsUses if (access == nullptr) { - m_accesses.insert(m_accesses.begin() + index, Access(offs, accessType, accessLayout)); - access = &m_accesses.back(); + access = &*m_accesses.insert(m_accesses.begin() + index, Access(offs, accessType, accessLayout)); } access->Count++; From 8c602eea8e18c814e4d27aef7be6cbc534179af7 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 16 Mar 2023 11:12:43 +0100 Subject: [PATCH 04/52] Fix a JITDUMP --- src/coreclr/jit/promotion.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 0a17920b884eb..d679fa906dd03 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -977,10 +977,11 @@ PhaseStatus Promotion::Run() assert(!rep.NeedsReadBack || !rep.NeedsWriteBack); if (rep.NeedsReadBack) { - JITDUMP("Reading back dirty replacement V%02.[%03u..%03u) -> V%02u at the end of " FMT_BB "\n", + JITDUMP("Reading back replacement V%02u.[%03u..%03u) -> V%02u at the end of " FMT_BB "\n", i, rep.Offset, rep.Offset + genTypeSize(rep.AccessType), - rep.LclNum); + rep.LclNum, + bb->bbNum); GenTree* dst = m_compiler->gtNewLclvNode(rep.LclNum, rep.AccessType); GenTree* src = m_compiler->gtNewLclFldNode(i, rep.AccessType, rep.Offset); From 294e7be87052532dd9b882434917713ed283fced Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 16 Mar 2023 11:13:06 +0100 Subject: [PATCH 05/52] Add overlapped assignment heuristic --- src/coreclr/jit/promotion.cpp | 48 +++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index d679fa906dd03..2e2742fe2641c 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -28,6 +28,10 @@ struct Access // Number of times we saw this access. unsigned Count = 0; + // Number of times this access is on the RHS of an assignment. + unsigned CountAssignmentSource = 0; + // Number of times this access is on the LHS of an assignment. + unsigned CountAssignmentDestination = 0; // Number of times this access is on the RHS of an assignment where the LHS is a probable register candidate. unsigned CountAssignmentsToRegisterCandidate = 0; // Number of times this access is on the LHS of an assignment where the RHS is a probable register candidate. @@ -224,16 +228,24 @@ class LocalsUses } access->Count++; - AccessKindFlags assignmentTo = AccessKindFlags::IsAssignmentSource | AccessKindFlags::IsAssignmentToRegisterCandidate; - if ((flags & assignmentTo) == assignmentTo) + if ((flags & AccessKindFlags::IsAssignmentSource) != AccessKindFlags::None) { - access->CountAssignmentsToRegisterCandidate++; + access->CountAssignmentSource++; + + if ((flags & AccessKindFlags::IsAssignmentToRegisterCandidate) != AccessKindFlags::None) + { + access->CountAssignmentsToRegisterCandidate++; + } } - AccessKindFlags assignmentFrom = AccessKindFlags::IsAssignmentDestination | AccessKindFlags::IsAssignmentFromRegisterCandidate; - if ((flags & assignmentFrom) == assignmentFrom) + if ((flags & AccessKindFlags::IsAssignmentDestination) != AccessKindFlags::None) { - access->CountAssignmentsFromRegisterCandidate++; + access->CountAssignmentDestination++; + + if ((flags & AccessKindFlags::IsAssignmentFromRegisterCandidate) != AccessKindFlags::None) + { + access->CountAssignmentsFromRegisterCandidate++; + } } if ((flags & AccessKindFlags::IsCallArg) != AccessKindFlags::None) @@ -361,6 +373,9 @@ class LocalsUses unsigned countOverlappedCalls = 0; unsigned countOverlappedReturns = 0; unsigned countOverlappedRetbufs = 0; + unsigned countOverlappedAssignmentDestination = 0; + unsigned countOverlappedAssignmentSource = 0; + bool overlap = false; for (const Access& otherAccess : m_accesses) { @@ -380,6 +395,8 @@ class LocalsUses countOverlappedCalls += otherAccess.CountCallArgs; countOverlappedReturns += otherAccess.CountReturns; countOverlappedRetbufs += otherAccess.CountPassedAsRetbuf; + countOverlappedAssignmentDestination += otherAccess.CountAssignmentDestination; + countOverlappedAssignmentSource += otherAccess.CountAssignmentSource; } unsigned costWithout = 0; @@ -406,13 +423,15 @@ class LocalsUses } numReadBacks += countOverlappedRetbufs; + numReadBacks += countOverlappedAssignmentDestination; + costWith += numReadBacks * 50; // Write backs with TYP_REFs when the base local is an implicit byref // involves checked write barriers, so they are very expensive. // TODO-CQ: This should be adjusted once we type implicit byrefs as TYP_I_IMPL. unsigned writeBackCost = comp->lvaIsImplicitByRefLocal(lclNum) && (access.AccessType == TYP_REF) ? 150 : 50; - unsigned numWriteBacks = countOverlappedCalls + countOverlappedReturns; + unsigned numWriteBacks = countOverlappedCalls + countOverlappedReturns + countOverlappedAssignmentSource; costWith += numWriteBacks * writeBackCost; JITDUMP("Evaluating access %s @ %03u\n", varTypeName(access.AccessType), access.Offset); @@ -452,13 +471,14 @@ class LocalsUses printf(" %s @ %03u\n", varTypeName(access.AccessType), access.Offset); } - printf(" #: %u\n", access.Count); - printf(" # assignments to reg-candidate: %u\n", access.CountAssignmentsToRegisterCandidate); - printf(" # as call arg: %u\n", access.CountCallArgs); - printf(" # as implicit by-ref call arg: %u\n", access.CountCallArgsByImplicitRef); - printf(" # as on-stack call arg: %u\n", access.CountCallArgsOnStack); - printf(" # as retbuf: %u\n", access.CountPassedAsRetbuf); - printf(" # as returned value: %u\n\n", access.CountReturns); + printf(" #: %u\n", access.Count); + printf(" # assigned from: %u\n", access.CountAssignmentSource); + printf(" # assigned to: %u\n", access.CountAssignmentDestination); + printf(" # as call arg: %u\n", access.CountCallArgs); + printf(" # as implicit by-ref call arg: %u\n", access.CountCallArgsByImplicitRef); + printf(" # as on-stack call arg: %u\n", access.CountCallArgsOnStack); + printf(" # as retbuf: %u\n", access.CountPassedAsRetbuf); + printf(" # as returned value: %u\n\n", access.CountReturns); } } #endif From 9fde2ee02ee523dbe1267854e43957724a46c211 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 16 Mar 2023 11:13:24 +0100 Subject: [PATCH 06/52] Fix register candidate assignment accounting --- src/coreclr/jit/promotion.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 2e2742fe2641c..e2ed1ccc95bd6 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -592,7 +592,7 @@ class LocalsUseVisitor : public GenTreeVisitor if (user->gtGetOp2()->OperIs(GT_LCL_VAR)) { LclVarDsc* dsc = m_compiler->lvaGetDesc(user->gtGetOp2()->AsLclVarCommon()); - if (!dsc->IsAddressExposed()) + if ((dsc->TypeGet() != TYP_STRUCT) && !dsc->IsAddressExposed()) { flags |= AccessKindFlags::IsAssignmentFromRegisterCandidate; } @@ -606,7 +606,7 @@ class LocalsUseVisitor : public GenTreeVisitor if (user->gtGetOp1()->OperIs(GT_LCL_VAR)) { LclVarDsc* dsc = m_compiler->lvaGetDesc(user->gtGetOp1()->AsLclVarCommon()); - if (!dsc->IsAddressExposed()) + if ((dsc->TypeGet() != TYP_STRUCT) && !dsc->IsAddressExposed()) { flags |= AccessKindFlags::IsAssignmentToRegisterCandidate; } From a030d9846d1dd29ad7090a9eda19e6ec6c9f4121 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 16 Mar 2023 11:13:53 +0100 Subject: [PATCH 07/52] Add a note in the dump when known conservative copies happen --- src/coreclr/jit/promotion.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index e2ed1ccc95bd6..4bf09e7c3ffb9 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -715,7 +715,7 @@ class ReplaceVisitor : public GenTreeVisitor if (lhsLcl->TypeIs(TYP_STRUCT)) { unsigned size = lhsLcl->GetLayout(m_compiler)->GetSize(); - ReadBackAfter(use, user, lhsLcl->GetLclNum(), lhsLcl->GetLclOffs(), size); + ReadBackAfter(use, user, lhsLcl->GetLclNum(), lhsLcl->GetLclOffs(), size, true); } } @@ -875,7 +875,7 @@ class ReplaceVisitor : public GenTreeVisitor } } - void ReadBackAfter(GenTree** use, GenTree* user, unsigned lcl, unsigned offs, unsigned size) + void ReadBackAfter(GenTree** use, GenTree* user, unsigned lcl, unsigned offs, unsigned size, bool conservative = false) { jitstd::vector& replacements = m_replacements[lcl]; size_t index = BinarySearch(replacements, offs); @@ -906,6 +906,12 @@ class ReplaceVisitor : public GenTreeVisitor //use = &comma->gtOp2; //m_madeChanges = true; + + if (conservative) + { + JITDUMP("*** Conservatively marking as read-back\n"); + conservative = false; + } } } }; From dd3b8e3fbd741ef19844784c4e4510c47f53b7a2 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 16 Mar 2023 16:05:25 +0100 Subject: [PATCH 08/52] Weighted uses --- src/coreclr/jit/promotion.cpp | 87 +++++++++++++++++++++++++++-------- 1 file changed, 67 insertions(+), 20 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 4bf09e7c3ffb9..5cb50394df955 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -42,6 +42,23 @@ struct Access unsigned CountReturns = 0; unsigned CountPassedAsRetbuf = 0; + // Number of times we saw this access. + weight_t CountWtd = 0; + // Number of times this access is on the RHS of an assignment. + weight_t CountAssignmentSourceWtd = 0; + // Number of times this access is on the LHS of an assignment. + weight_t CountAssignmentDestinationWtd = 0; + // Number of times this access is on the RHS of an assignment where the LHS is a probable register candidate. + weight_t CountAssignmentsToRegisterCandidateWtd = 0; + // Number of times this access is on the LHS of an assignment where the RHS is a probable register candidate. + weight_t CountAssignmentsFromRegisterCandidateWtd = 0; + weight_t CountCallArgsWtd = 0; + weight_t CountCallArgsByImplicitRefWtd = 0; + weight_t CountCallArgsOnStackWtd = 0; + weight_t CountReturnsWtd = 0; + weight_t CountPassedAsRetbufWtd = 0; + + Access( unsigned offset, var_types accessType, ClassLayout* layout) : Layout(layout), Offset(offset), AccessType(accessType) @@ -185,7 +202,7 @@ class LocalsUses { } - void RecordAccess(unsigned offs, var_types accessType, ClassLayout* accessLayout, AccessKindFlags flags) + void RecordAccess(unsigned offs, var_types accessType, ClassLayout* accessLayout, AccessKindFlags flags, weight_t weight) { Access* access = nullptr; @@ -228,49 +245,60 @@ class LocalsUses } access->Count++; + access->CountWtd += weight; + if ((flags & AccessKindFlags::IsAssignmentSource) != AccessKindFlags::None) { access->CountAssignmentSource++; + access->CountAssignmentSourceWtd += weight; if ((flags & AccessKindFlags::IsAssignmentToRegisterCandidate) != AccessKindFlags::None) { access->CountAssignmentsToRegisterCandidate++; + access->CountAssignmentsToRegisterCandidateWtd += weight; } } if ((flags & AccessKindFlags::IsAssignmentDestination) != AccessKindFlags::None) { access->CountAssignmentDestination++; + access->CountAssignmentDestinationWtd += weight; if ((flags & AccessKindFlags::IsAssignmentFromRegisterCandidate) != AccessKindFlags::None) { access->CountAssignmentsFromRegisterCandidate++; + access->CountAssignmentsFromRegisterCandidateWtd += weight; } } if ((flags & AccessKindFlags::IsCallArg) != AccessKindFlags::None) { access->CountCallArgs++; + access->CountCallArgsWtd += weight; if ((flags & AccessKindFlags::IsCallArgByImplicitRef) != AccessKindFlags::None) { access->CountCallArgsByImplicitRef++; + access->CountCallArgsByImplicitRefWtd += weight; } if ((flags & AccessKindFlags::IsCallArgOnStack) != AccessKindFlags::None) { access->CountCallArgsOnStack++; + access->CountCallArgsOnStackWtd += weight; } } if ((flags & AccessKindFlags::IsCallRetBuf) != AccessKindFlags::None) { access->CountPassedAsRetbuf++; + access->CountPassedAsRetbufWtd += weight; } if ((flags & AccessKindFlags::IsReturned) != AccessKindFlags::None) { access->CountReturns++; + access->CountReturnsWtd += weight; } } @@ -375,6 +403,11 @@ class LocalsUses unsigned countOverlappedRetbufs = 0; unsigned countOverlappedAssignmentDestination = 0; unsigned countOverlappedAssignmentSource = 0; + weight_t countOverlappedCallsWtd = 0; + weight_t countOverlappedReturnsWtd = 0; + weight_t countOverlappedRetbufsWtd = 0; + weight_t countOverlappedAssignmentDestinationWtd = 0; + weight_t countOverlappedAssignmentSourceWtd = 0; bool overlap = false; for (const Access& otherAccess : m_accesses) @@ -397,49 +430,55 @@ class LocalsUses countOverlappedRetbufs += otherAccess.CountPassedAsRetbuf; countOverlappedAssignmentDestination += otherAccess.CountAssignmentDestination; countOverlappedAssignmentSource += otherAccess.CountAssignmentSource; + + countOverlappedCallsWtd += otherAccess.CountCallArgsWtd; + countOverlappedReturnsWtd += otherAccess.CountReturnsWtd; + countOverlappedRetbufsWtd += otherAccess.CountPassedAsRetbufWtd; + countOverlappedAssignmentDestinationWtd += otherAccess.CountAssignmentDestinationWtd; + countOverlappedAssignmentSourceWtd += otherAccess.CountAssignmentSourceWtd; } - unsigned costWithout = 0; + weight_t costWithout = 0; // A normal access without promotion looks like: // mov reg, [reg+offs] // Which is 5 bytes on x64. - costWithout += access.Count * 50; + costWithout += access.CountWtd * 50; // The register then also needs to be used. In many cases it is containable, however, so we pay a bit less for this. - costWithout += access.Count * 25; + costWithout += access.CountWtd * 25; - unsigned costWith = 0; + weight_t costWith = 0; // For any use we expect to just use the register directly. - costWith += access.Count * 25; + costWith += access.CountWtd * 25; - unsigned numReadBacks = 0; + weight_t countReadBacksWtd = 0; LclVarDsc* lcl = comp->lvaGetDesc(lclNum); // For parameters we need an initial read back if (lcl->lvIsParam) { - numReadBacks++; + countReadBacksWtd += comp->fgFirstBB->getBBWeight(comp); } - numReadBacks += countOverlappedRetbufs; - numReadBacks += countOverlappedAssignmentDestination; + countReadBacksWtd += countOverlappedRetbufsWtd; + countReadBacksWtd += countOverlappedAssignmentDestinationWtd; - costWith += numReadBacks * 50; + costWith += countReadBacksWtd * 50; // Write backs with TYP_REFs when the base local is an implicit byref // involves checked write barriers, so they are very expensive. // TODO-CQ: This should be adjusted once we type implicit byrefs as TYP_I_IMPL. - unsigned writeBackCost = comp->lvaIsImplicitByRefLocal(lclNum) && (access.AccessType == TYP_REF) ? 150 : 50; - unsigned numWriteBacks = countOverlappedCalls + countOverlappedReturns + countOverlappedAssignmentSource; - costWith += numWriteBacks * writeBackCost; + int writeBackCost = comp->lvaIsImplicitByRefLocal(lclNum) && (access.AccessType == TYP_REF) ? 150 : 50; + weight_t countWriteBacksWtd = countOverlappedCallsWtd + countOverlappedReturnsWtd + countOverlappedAssignmentSourceWtd; + costWith += countWriteBacksWtd * writeBackCost; JITDUMP("Evaluating access %s @ %03u\n", varTypeName(access.AccessType), access.Offset); - JITDUMP(" Write-back cost: %u\n", writeBackCost); - JITDUMP(" # write backs: %u\n", numWriteBacks); - JITDUMP(" # read backs: %u\n", numReadBacks); - JITDUMP(" Cost with: %u\n", costWith); - JITDUMP(" Cost without: %u\n", costWithout); + JITDUMP(" Write-back cost: %d\n", writeBackCost); + JITDUMP(" # write backs: " FMT_WT "\n", countWriteBacksWtd); + JITDUMP(" # read backs: " FMT_WT "\n", countReadBacksWtd); + JITDUMP(" Cost with: " FMT_WT "\n", costWith); + JITDUMP(" Cost without: " FMT_WT "\n", costWithout); if (costWith < costWithout) { @@ -488,6 +527,7 @@ class LocalsUseVisitor : public GenTreeVisitor { Promotion* m_prom; LocalsUses* m_uses; + BasicBlock* m_curBB; public: enum { @@ -501,6 +541,11 @@ class LocalsUseVisitor : public GenTreeVisitor new (&m_uses[i], jitstd::placement_t()) LocalsUses(prom->m_compiler); } + void SetBB(BasicBlock* bb) + { + m_curBB = bb; + } + LocalsUses* GetUsesByLocal(unsigned lcl) { return &m_uses[lcl]; } fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) @@ -526,7 +571,7 @@ class LocalsUseVisitor : public GenTreeVisitor var_types accessType = lcl->TypeGet(); ClassLayout* accessLayout = varTypeIsStruct(lcl) ? lcl->GetLayout(m_compiler) : nullptr; AccessKindFlags accessFlags = ClassifyLocalAccess(lcl, user); - m_uses[lcl->GetLclNum()].RecordAccess(offs, accessType, accessLayout, accessFlags); + m_uses[lcl->GetLclNum()].RecordAccess(offs, accessType, accessLayout, accessFlags, m_curBB->getBBWeight(m_compiler)); } } } @@ -933,6 +978,8 @@ PhaseStatus Promotion::Run() LocalsUseVisitor localsUse(this); for (BasicBlock* bb : m_compiler->Blocks()) { + localsUse.SetBB(bb); + for (Statement* stmt : bb->Statements()) { DISPSTMT(stmt); From a693d295f4583799fc493385efca0083897db593 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 17 Mar 2023 08:48:21 +0100 Subject: [PATCH 09/52] Add a fix to enable promotion in customer scenario --- src/coreclr/jit/gentree.cpp | 2 +- src/coreclr/jit/importercalls.cpp | 12 ++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index e49bb0e8b5450..0c6772b2ae7d5 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -8443,7 +8443,7 @@ GenTree* Compiler::gtClone(GenTree* tree, bool complexOK) return nullptr; } - if (tree->gtOper == GT_FIELD) + if (tree->OperIs(GT_FIELD)) { GenTree* objp = nullptr; diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index f9354f947865f..75ab5fa59361c 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -2908,8 +2908,16 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, // We need to use both index and ptr-to-span twice, so clone or spill. index = impCloneExpr(index, &indexClone, NO_CLASS_HANDLE, CHECK_SPILL_ALL, nullptr DEBUGARG("Span.get_Item index")); - ptrToSpan = impCloneExpr(ptrToSpan, &ptrToSpanClone, NO_CLASS_HANDLE, CHECK_SPILL_ALL, - nullptr DEBUGARG("Span.get_Item ptrToSpan")); + + if (impIsAddressInLocal(ptrToSpan)) + { + ptrToSpanClone = gtCloneExpr(ptrToSpan); + } + else + { + ptrToSpan = impCloneExpr(ptrToSpan, &ptrToSpanClone, NO_CLASS_HANDLE, CHECK_SPILL_ALL, + nullptr DEBUGARG("Span.get_Item ptrToSpan")); + } // Bounds check CORINFO_FIELD_HANDLE lengthHnd = info.compCompHnd->getFieldInClass(clsHnd, 1); From 705e41973138c913f785287b350d65708dfafb45 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 17 Mar 2023 15:28:57 +0100 Subject: [PATCH 10/52] JIT: Generalize handling of commas in block morphing Eliminate commas early in block morphing, before the rest of the phase needs to look at it. Do this by extracting their side effects and adding them on top of the returned result instead. This allows us to handle arbitrary COMMAs on the source and destination operands in a general manner. --- src/coreclr/jit/morphblock.cpp | 246 ++++++++++++++------------------- 1 file changed, 105 insertions(+), 141 deletions(-) diff --git a/src/coreclr/jit/morphblock.cpp b/src/coreclr/jit/morphblock.cpp index a60de6ff2be74..97d0b8faf7d03 100644 --- a/src/coreclr/jit/morphblock.cpp +++ b/src/coreclr/jit/morphblock.cpp @@ -27,12 +27,10 @@ class MorphInitBlockHelper return "MorphInitBlock"; } - static GenTree* MorphBlock(Compiler* comp, GenTree* tree, bool isDest); - static GenTree* MorphCommaBlock(Compiler* comp, GenTreeOp* firstComma); - private: - void TryInitFieldByField(); - void TryPrimitiveInit(); + void TryInitFieldByField(); + void TryPrimitiveInit(); + GenTree* EliminateCommas(); protected: Compiler* m_comp; @@ -127,6 +125,8 @@ GenTree* MorphInitBlockHelper::Morph() { JITDUMP("%s:\n", GetHelperName()); + GenTree* sideEffects = EliminateCommas(); + PrepareDst(); PrepareSrc(); PropagateBlockAssertions(); @@ -147,12 +147,18 @@ GenTree* MorphInitBlockHelper::Morph() { m_result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; } - if (m_comp->verbose) +#endif + + while (sideEffects != nullptr) { - printf("%s (after):\n", GetHelperName()); - m_comp->gtDispTree(m_result); + m_result = m_comp->gtNewOperNode(GT_COMMA, m_result->TypeGet(), sideEffects, m_result); + INDEBUG(m_result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); + + sideEffects = sideEffects->gtNext; } -#endif // DEBUG + + JITDUMP("%s (after):\n", GetHelperName()); + DISPTREE(m_result); return m_result; } @@ -163,12 +169,7 @@ GenTree* MorphInitBlockHelper::Morph() // void MorphInitBlockHelper::PrepareDst() { - GenTree* origDst = m_asg->gtGetOp1(); - m_dst = MorphBlock(m_comp, origDst, true); - if (m_dst != origDst) - { - m_asg->gtOp1 = m_dst; - } + m_dst = m_asg->gtGetOp1(); if (m_asg->TypeGet() != m_dst->TypeGet()) { @@ -213,7 +214,7 @@ void MorphInitBlockHelper::PrepareDst() #if defined(DEBUG) if (m_comp->verbose) { - printf("PrepareDst for [%06u] ", m_comp->dspTreeID(origDst)); + printf("PrepareDst for [%06u] ", m_comp->dspTreeID(m_dst)); if (m_dstLclNode != nullptr) { printf("have found a local var V%02u.\n", m_dstLclNum); @@ -319,125 +320,6 @@ void MorphInitBlockHelper::MorphStructCases() } } -//------------------------------------------------------------------------ -// MorphBlock: Morph a block node preparatory to morphing a block assignment. -// -// Arguments: -// comp - a compiler instance; -// tree - a struct type node; -// isDest - true if this is the destination of an assignment; -// -// Return Value: -// Returns the possibly-morphed node. The caller is responsible for updating -// the parent of this node. -// -// static -GenTree* MorphInitBlockHelper::MorphBlock(Compiler* comp, GenTree* tree, bool isDest) -{ - JITDUMP("MorphBlock for %s tree, before:\n", (isDest ? "dst" : "src")); - DISPTREE(tree); - - assert(varTypeIsStruct(tree)); - - if (tree->OperIs(GT_COMMA)) - { - // TODO-Cleanup: this block is not needed for not struct nodes, but - // TryPrimitiveCopy works wrong without this transformation. - tree = MorphCommaBlock(comp, tree->AsOp()); - if (isDest) - { - tree->SetDoNotCSE(); - } - } - - assert(!tree->OperIsIndir() || varTypeIsI(genActualType(tree->AsIndir()->Addr()))); - - JITDUMP("MorphBlock after:\n"); - DISPTREE(tree); - return tree; -} - -//------------------------------------------------------------------------ -// MorphCommaBlock: transform COMMA(X) as OBJ(COMMA byref(ADDR(X)). -// -// Notes: -// In order to CSE and value number array index expressions and bounds checks, -// the commas in which they are contained need to match. -// The pattern is that the COMMA should be the address expression. -// Therefore, we insert a GT_ADDR just above the node, and wrap it in an obj or ind. -// TODO-1stClassStructs: Consider whether this can be improved. -// Example: -// before: [3] comma struct <- [2] comma struct <- [1] LCL_VAR struct -// after: [5] obj <- [3] comma byref <- [2] comma byref <- [4] addr byref <- [1] LCL_VAR struct -// -// static -GenTree* MorphInitBlockHelper::MorphCommaBlock(Compiler* comp, GenTreeOp* firstComma) -{ - assert(firstComma->OperIs(GT_COMMA)); - - ArrayStack commas(comp->getAllocator(CMK_ArrayStack)); - for (GenTree* currComma = firstComma; currComma != nullptr && currComma->OperIs(GT_COMMA); - currComma = currComma->gtGetOp2()) - { - commas.Push(currComma); - } - - GenTree* lastComma = commas.Top(); - - GenTree* effectiveVal = lastComma->gtGetOp2(); - - if (!effectiveVal->OperIsIndir() && !effectiveVal->IsLocal()) - { - return firstComma; - } - - assert(effectiveVal == firstComma->gtEffectiveVal()); - - GenTree* effectiveValAddr = comp->gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal); - - INDEBUG(effectiveValAddr->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); - - lastComma->AsOp()->gtOp2 = effectiveValAddr; - - while (!commas.Empty()) - { - GenTree* comma = commas.Pop(); - comma->gtType = TYP_BYREF; - - // The "IND(COMMA)" => "COMMA(IND)" transform may have set NO_CSEs on these COMMAs, clear them. - comma->ClearDoNotCSE(); - comp->gtUpdateNodeSideEffects(comma); - } - - const var_types blockType = effectiveVal->TypeGet(); - GenTree* addr = firstComma; - - GenTree* res; - - if (blockType == TYP_STRUCT) - { - CORINFO_CLASS_HANDLE structHnd = comp->gtGetStructHandleIfPresent(effectiveVal); - if (structHnd == NO_CLASS_HANDLE) - { - // TODO-1stClassStructs: get rid of all such cases. - res = comp->gtNewIndir(blockType, addr); - } - else - { - res = comp->gtNewObjNode(structHnd, addr); - comp->gtSetObjGcInfo(res->AsObj()); - } - } - else - { - res = comp->gtNewIndir(blockType, addr); - } - - comp->gtUpdateNodeSideEffects(res); - INDEBUG(res->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); - return res; -} - //------------------------------------------------------------------------ // InitFieldByField: Attempts to promote a local block init tree to a tree // of promoted field initialization assignments. @@ -665,6 +547,93 @@ void MorphInitBlockHelper::TryPrimitiveInit() } } +//------------------------------------------------------------------------ +// EliminateCommas: Prepare for block morphing by removing commas from the +// destination and source operands of the assignment. +// +// Returns: +// Extracted side effects, in reverse order, linked via the gtNext fields of +// the nodes. +// +// Notes: +// In the general case we have a tree like +// +// +// ASG +// / \. +// COMMA COMMA +// / \ / \. +// A IND C D +// | +// B +// +// +// and we'd like downstream code to just see and be expand ASG(IND(B), D). +// Dealing with A is simple enough. However, if we also need to deal with C, +// then we first need to extract side effects of B. The goal is to then +// produce a final result that looks like: +// +// COMMA +// / \. +// A COMMA +// / \. +// ASG COMMA +// / \ / \. +// tmp B C ASG +// / \. +// IND D +// | +// tmp +// +// Note that the final resulting tree is created in the caller since it also +// needs to propagate side effect flags from the decomposed assignment to all +// the created commas. Therefore this function just returns a linked list of +// the side effects to be used for that purpose. +// +GenTree* MorphInitBlockHelper::EliminateCommas() +{ + GenTree* sideEffects = nullptr; + auto addSideEffect = [&sideEffects](GenTree* sideEff) { + sideEff->gtNext = sideEffects; + sideEffects = sideEff; + }; + + GenTree* lhs = m_asg->gtGetOp1(); + while (lhs->OperIs(GT_COMMA)) + { + addSideEffect(lhs->gtGetOp1()); + lhs = lhs->gtGetOp2(); + } + + assert(lhs->OperIsUnary() || lhs->OperIsLeaf()); + + GenTree* rhs = m_asg->gtGetOp2(); + if (lhs->OperIsUnary() && ((lhs->gtGetOp1()->gtFlags & GTF_ALL_EFFECT) != 0) && rhs->OperIs(GT_COMMA)) + { + GenTree* addr = lhs->gtGetOp1(); + unsigned lhsAddrLclNum = m_comp->lvaGrabTemp(true DEBUGARG("Block morph LHS addr")); + + addSideEffect(m_comp->gtNewTempAssign(lhsAddrLclNum, addr)); + lhs->AsUnOp()->gtOp1 = m_comp->gtNewLclvNode(lhsAddrLclNum, genActualType(addr)); + m_comp->gtUpdateNodeSideEffects(lhs); + } + + while (rhs->OperIs(GT_COMMA)) + { + addSideEffect(rhs->gtGetOp1()); + rhs = rhs->gtGetOp2(); + } + + if (sideEffects != nullptr) + { + m_asg->gtOp1 = lhs; + m_asg->gtOp2 = rhs; + m_comp->gtUpdateNodeSideEffects(m_asg); + } + + return sideEffects; +} + class MorphCopyBlockHelper : public MorphInitBlockHelper { public: @@ -735,12 +704,7 @@ MorphCopyBlockHelper::MorphCopyBlockHelper(Compiler* comp, GenTree* asg) : Morph // void MorphCopyBlockHelper::PrepareSrc() { - GenTree* origSrc = m_asg->gtGetOp2(); - m_src = MorphBlock(m_comp, origSrc, false); - if (m_src != origSrc) - { - m_asg->gtOp2 = m_src; - } + m_src = m_asg->gtGetOp2(); if (m_src->IsLocal()) { From 75c3063df32fe0de001fe6b9aaddde0e93beaabe Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 22 Mar 2023 12:02:44 +0100 Subject: [PATCH 11/52] Write call arg uses before arg instead of call --- src/coreclr/jit/promotion.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 5cb50394df955..49ebf1e144c61 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -790,7 +790,7 @@ class ReplaceVisitor : public GenTreeVisitor if (argNodeLcl->TypeIs(TYP_STRUCT)) { unsigned size = argNodeLcl->GetLayout(m_compiler)->GetSize(); - WriteBackBefore(use, argNodeLcl->GetLclNum(), argNodeLcl->GetLclOffs(), size); + WriteBackBefore(&arg.EarlyNodeRef(), argNodeLcl->GetLclNum(), argNodeLcl->GetLclOffs(), size); } } From 47f27f5eb13d19487567e0efecdb6e4aae3fda8d Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 22 Mar 2023 12:02:57 +0100 Subject: [PATCH 12/52] Clean up --- src/coreclr/jit/promotion.cpp | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 49ebf1e144c61..9db7723c3b148 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -760,7 +760,7 @@ class ReplaceVisitor : public GenTreeVisitor if (lhsLcl->TypeIs(TYP_STRUCT)) { unsigned size = lhsLcl->GetLayout(m_compiler)->GetSize(); - ReadBackAfter(use, user, lhsLcl->GetLclNum(), lhsLcl->GetLclOffs(), size, true); + MarkForReadBack(lhsLcl->GetLclNum(), lhsLcl->GetLclOffs(), size, true); } } @@ -801,7 +801,7 @@ class ReplaceVisitor : public GenTreeVisitor GenTreeLclVarCommon* retBufLcl = retBufArg->GetNode()->AsLclVarCommon(); unsigned size = m_compiler->typGetObjLayout(call->gtRetClsHnd)->GetSize(); - ReadBackAfter(use, user, retBufLcl->GetLclNum(), retBufLcl->GetLclOffs(), size); + MarkForReadBack(retBufLcl->GetLclNum(), retBufLcl->GetLclOffs(), size); } return fgWalkResult::WALK_CONTINUE; @@ -920,7 +920,7 @@ class ReplaceVisitor : public GenTreeVisitor } } - void ReadBackAfter(GenTree** use, GenTree* user, unsigned lcl, unsigned offs, unsigned size, bool conservative = false) + void MarkForReadBack(unsigned lcl, unsigned offs, unsigned size, bool conservative = false) { jitstd::vector& replacements = m_replacements[lcl]; size_t index = BinarySearch(replacements, offs); @@ -943,18 +943,9 @@ class ReplaceVisitor : public GenTreeVisitor rep.NeedsWriteBack = false; index++; - //GenTree* dst = m_compiler->gtNewLclvNode(rep.LclNum, rep.AccessType); - //GenTree* src = m_compiler->gtNewLclFldNode(lcl, rep.AccessType, rep.Offset); - - //GenTreeOp* comma = m_compiler->gtNewOperNode(GT_COMMA, TYP_VOID, *use, m_compiler->gtNewAssignNode(dst, src)); - //*use = comma; - //use = &comma->gtOp2; - - //m_madeChanges = true; - if (conservative) { - JITDUMP("*** Conservatively marking as read-back\n"); + JITDUMP("*** NYI: Conservatively marking as read-back\n"); conservative = false; } } From bcc1dd3e44dcff0e3104a1fa461ff526e55eaa7e Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 22 Mar 2023 12:03:17 +0100 Subject: [PATCH 13/52] Change types of comma args properly in call arg morphing --- src/coreclr/jit/morph.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 5d49fd6d575e5..b4c81573e0446 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -3320,7 +3320,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) if (argLclNum != BAD_VAR_NUM) { - argObj->ChangeType(argVarDsc->TypeGet()); + argx->ChangeType(argVarDsc->TypeGet()); argObj->SetOper(GT_LCL_VAR); argObj->AsLclVar()->SetLclNum(argLclNum); } @@ -3338,7 +3338,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) { // TODO-CQ: perform this transformation in lowering instead of here and // avoid marking enregisterable structs DNER. - argObj->ChangeType(structBaseType); + argx->ChangeType(structBaseType); if (argObj->OperIs(GT_LCL_VAR)) { argObj->SetOper(GT_LCL_FLD); From 724a582fa3da3df09ea329aad16a9a0ab4795afe Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 22 Mar 2023 12:05:57 +0100 Subject: [PATCH 14/52] Fix write-back of local on RHS of assignment --- src/coreclr/jit/promotion.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 9db7723c3b148..f21abea049cd1 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -750,7 +750,7 @@ class ReplaceVisitor : public GenTreeVisitor if (rhsLcl->TypeIs(TYP_STRUCT)) { unsigned size = rhsLcl->GetLayout(m_compiler)->GetSize(); - WriteBackBefore(use, rhsLcl->GetLclNum(), rhsLcl->GetLclOffs(), size); + WriteBackBefore(&asg->gtOp2, rhsLcl->GetLclNum(), rhsLcl->GetLclOffs(), size); } } From 4d3f4ba4720dbe9e9664d1ebefef5fa885c95b90 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 22 Mar 2023 12:58:35 +0100 Subject: [PATCH 15/52] Run jit-format --- src/coreclr/jit/importercalls.cpp | 2 +- src/coreclr/jit/loopcloning.h | 2 +- src/coreclr/jit/morphblock.cpp | 3 +- src/coreclr/jit/promotion.cpp | 260 ++++++++++++++++-------------- src/coreclr/jit/promotion.h | 1 + 5 files changed, 145 insertions(+), 123 deletions(-) diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index 75ab5fa59361c..d6f227ec4a890 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -2916,7 +2916,7 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, else { ptrToSpan = impCloneExpr(ptrToSpan, &ptrToSpanClone, NO_CLASS_HANDLE, CHECK_SPILL_ALL, - nullptr DEBUGARG("Span.get_Item ptrToSpan")); + nullptr DEBUGARG("Span.get_Item ptrToSpan")); } // Bounds check diff --git a/src/coreclr/jit/loopcloning.h b/src/coreclr/jit/loopcloning.h index cf8ceb0dce9c9..9ab83531d0f52 100644 --- a/src/coreclr/jit/loopcloning.h +++ b/src/coreclr/jit/loopcloning.h @@ -885,7 +885,7 @@ struct LoopCloneContext // Ensure that the block condition is present, if not allocate space. JitExpandArrayStack*>* EnsureBlockConditions(unsigned loopNum, - unsigned totalBlocks); + unsigned totalBlocks); #ifdef DEBUG // Print the block conditions for the loop. diff --git a/src/coreclr/jit/morphblock.cpp b/src/coreclr/jit/morphblock.cpp index 97d0b8faf7d03..3c1ca5a31157d 100644 --- a/src/coreclr/jit/morphblock.cpp +++ b/src/coreclr/jit/morphblock.cpp @@ -742,7 +742,8 @@ void MorphCopyBlockHelper::TrySpecialCases() m_transformationDecision = BlockTransformation::SkipMultiRegSrc; m_result = m_asg; } - else if (m_src->gtEffectiveVal()->IsCall() && m_dst->OperIs(GT_LCL_VAR) && m_dstVarDsc->CanBeReplacedWithItsField(m_comp)) + else if (m_src->gtEffectiveVal()->IsCall() && m_dst->OperIs(GT_LCL_VAR) && + m_dstVarDsc->CanBeReplacedWithItsField(m_comp)) { JITDUMP("Not morphing a single reg call return\n"); m_transformationDecision = BlockTransformation::SkipSingleRegCallSrc; diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index f21abea049cd1..5f01d27194c2e 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -23,8 +23,8 @@ PhaseStatus Compiler::PromoteStructsNew() struct Access { ClassLayout* Layout; - unsigned Offset; - var_types AccessType; + unsigned Offset; + var_types AccessType; // Number of times we saw this access. unsigned Count = 0; @@ -36,11 +36,11 @@ struct Access unsigned CountAssignmentsToRegisterCandidate = 0; // Number of times this access is on the LHS of an assignment where the RHS is a probable register candidate. unsigned CountAssignmentsFromRegisterCandidate = 0; - unsigned CountCallArgs = 0; - unsigned CountCallArgsByImplicitRef = 0; - unsigned CountCallArgsOnStack = 0; - unsigned CountReturns = 0; - unsigned CountPassedAsRetbuf = 0; + unsigned CountCallArgs = 0; + unsigned CountCallArgsByImplicitRef = 0; + unsigned CountCallArgsOnStack = 0; + unsigned CountReturns = 0; + unsigned CountPassedAsRetbuf = 0; // Number of times we saw this access. weight_t CountWtd = 0; @@ -52,15 +52,13 @@ struct Access weight_t CountAssignmentsToRegisterCandidateWtd = 0; // Number of times this access is on the LHS of an assignment where the RHS is a probable register candidate. weight_t CountAssignmentsFromRegisterCandidateWtd = 0; - weight_t CountCallArgsWtd = 0; - weight_t CountCallArgsByImplicitRefWtd = 0; - weight_t CountCallArgsOnStackWtd = 0; - weight_t CountReturnsWtd = 0; - weight_t CountPassedAsRetbufWtd = 0; + weight_t CountCallArgsWtd = 0; + weight_t CountCallArgsByImplicitRefWtd = 0; + weight_t CountCallArgsOnStackWtd = 0; + weight_t CountReturnsWtd = 0; + weight_t CountPassedAsRetbufWtd = 0; - - Access( - unsigned offset, var_types accessType, ClassLayout* layout) + Access(unsigned offset, var_types accessType, ClassLayout* layout) : Layout(layout), Offset(offset), AccessType(accessType) { } @@ -86,12 +84,11 @@ struct Access return true; } - }; // Find first entry with an equal offset, or bitwise complement of first // entry with a higher offset. -template +template static size_t BinarySearch(const jitstd::vector& vec, unsigned offset) { size_t min = 0; @@ -123,13 +120,13 @@ static size_t BinarySearch(const jitstd::vector& vec, unsigned offset) struct Replacement { - unsigned Offset; - var_types AccessType; - unsigned LclNum; + unsigned Offset; + var_types AccessType; + unsigned LclNum; ClassLayout* Layout; - bool NeedsWriteBack = true; - bool NeedsReadBack = false; - Replacement* Next = nullptr; + bool NeedsWriteBack = true; + bool NeedsReadBack = false; + Replacement* Next = nullptr; Replacement(unsigned offset, var_types accessType, unsigned lclNum, ClassLayout* layout) : Offset(offset), AccessType(accessType), LclNum(lclNum), Layout(layout) @@ -156,16 +153,16 @@ struct Replacement enum class AccessKindFlags : uint32_t { - None = 0, - IsCallArg = 1, - IsAssignmentSource = 2, - IsAssignmentDestination = 4, - IsAssignmentToRegisterCandidate = 8, + None = 0, + IsCallArg = 1, + IsAssignmentSource = 2, + IsAssignmentDestination = 4, + IsAssignmentToRegisterCandidate = 8, IsAssignmentFromRegisterCandidate = 16, - IsCallArgByImplicitRef = 32, - IsCallArgOnStack = 64, - IsCallRetBuf = 128, - IsReturned = 256, + IsCallArgByImplicitRef = 32, + IsCallArgOnStack = 64, + IsCallRetBuf = 128, + IsReturned = 256, }; inline constexpr AccessKindFlags operator~(AccessKindFlags a) @@ -202,7 +199,8 @@ class LocalsUses { } - void RecordAccess(unsigned offs, var_types accessType, ClassLayout* accessLayout, AccessKindFlags flags, weight_t weight) + void RecordAccess( + unsigned offs, var_types accessType, ClassLayout* accessLayout, AccessKindFlags flags, weight_t weight) { Access* access = nullptr; @@ -217,14 +215,14 @@ class LocalsUses Access& candidateAccess = m_accesses[index]; if (candidateAccess.AccessType == accessType) { - // Some operations on SIMD types do not require a layout, but those can be merged with ones that do. - bool isMergeableLayout = - (candidateAccess.Layout == accessLayout) || - (varTypeIsSIMD(accessType) && (candidateAccess.Layout == nullptr)); + // Some operations on SIMD types do not require a layout, but those can be merged with ones that + // do. + bool isMergeableLayout = (candidateAccess.Layout == accessLayout) || + (varTypeIsSIMD(accessType) && (candidateAccess.Layout == nullptr)); if (isMergeableLayout) { - access = &candidateAccess; + access = &candidateAccess; access->Layout = accessLayout; break; } @@ -309,7 +307,7 @@ class LocalsUses return; } - //jitstd::sort( + // jitstd::sort( // m_accesses.begin(), m_accesses.end(), // [](const Access& l, const Access& r) // { @@ -377,9 +375,10 @@ class LocalsUses #ifdef DEBUG char buf[32]; - sprintf_s(buf, sizeof(buf), "V%02u.[%03u..%03u)", lclNum, access.Offset, access.Offset + genTypeSize(access.AccessType)); + sprintf_s(buf, sizeof(buf), "V%02u.[%03u..%03u)", lclNum, access.Offset, + access.Offset + genTypeSize(access.AccessType)); size_t len = strlen(buf) + 1; - char* bufp = new (comp, CMK_DebugOnly) char[len]; + char* bufp = new (comp, CMK_DebugOnly) char[len]; strcpy_s(bufp, len, buf); #endif unsigned newLcl = comp->lvaGrabTemp(false DEBUGARG(bufp)); @@ -398,16 +397,16 @@ class LocalsUses bool EvaluateReplacement(Compiler* comp, unsigned lclNum, const Access& access) { - unsigned countOverlappedCalls = 0; - unsigned countOverlappedReturns = 0; - unsigned countOverlappedRetbufs = 0; - unsigned countOverlappedAssignmentDestination = 0; - unsigned countOverlappedAssignmentSource = 0; - weight_t countOverlappedCallsWtd = 0; - weight_t countOverlappedReturnsWtd = 0; - weight_t countOverlappedRetbufsWtd = 0; + unsigned countOverlappedCalls = 0; + unsigned countOverlappedReturns = 0; + unsigned countOverlappedRetbufs = 0; + unsigned countOverlappedAssignmentDestination = 0; + unsigned countOverlappedAssignmentSource = 0; + weight_t countOverlappedCallsWtd = 0; + weight_t countOverlappedReturnsWtd = 0; + weight_t countOverlappedRetbufsWtd = 0; weight_t countOverlappedAssignmentDestinationWtd = 0; - weight_t countOverlappedAssignmentSourceWtd = 0; + weight_t countOverlappedAssignmentSourceWtd = 0; bool overlap = false; for (const Access& otherAccess : m_accesses) @@ -445,7 +444,8 @@ class LocalsUses costWithout += access.CountWtd * 50; - // The register then also needs to be used. In many cases it is containable, however, so we pay a bit less for this. + // The register then also needs to be used. In many cases it is containable, however, so we pay a bit less for + // this. costWithout += access.CountWtd * 25; weight_t costWith = 0; @@ -453,8 +453,8 @@ class LocalsUses // For any use we expect to just use the register directly. costWith += access.CountWtd * 25; - weight_t countReadBacksWtd = 0; - LclVarDsc* lcl = comp->lvaGetDesc(lclNum); + weight_t countReadBacksWtd = 0; + LclVarDsc* lcl = comp->lvaGetDesc(lclNum); // For parameters we need an initial read back if (lcl->lvIsParam) { @@ -469,8 +469,9 @@ class LocalsUses // Write backs with TYP_REFs when the base local is an implicit byref // involves checked write barriers, so they are very expensive. // TODO-CQ: This should be adjusted once we type implicit byrefs as TYP_I_IMPL. - int writeBackCost = comp->lvaIsImplicitByRefLocal(lclNum) && (access.AccessType == TYP_REF) ? 150 : 50; - weight_t countWriteBacksWtd = countOverlappedCallsWtd + countOverlappedReturnsWtd + countOverlappedAssignmentSourceWtd; + int writeBackCost = comp->lvaIsImplicitByRefLocal(lclNum) && (access.AccessType == TYP_REF) ? 150 : 50; + weight_t countWriteBacksWtd = + countOverlappedCallsWtd + countOverlappedReturnsWtd + countOverlappedAssignmentSourceWtd; costWith += countWriteBacksWtd * writeBackCost; JITDUMP("Evaluating access %s @ %03u\n", varTypeName(access.AccessType), access.Offset); @@ -525,9 +526,10 @@ class LocalsUses class LocalsUseVisitor : public GenTreeVisitor { - Promotion* m_prom; + Promotion* m_prom; LocalsUses* m_uses; BasicBlock* m_curBB; + public: enum { @@ -536,7 +538,8 @@ class LocalsUseVisitor : public GenTreeVisitor LocalsUseVisitor(Promotion* prom) : GenTreeVisitor(prom->m_compiler), m_prom(prom) { - m_uses = reinterpret_cast(new (prom->m_compiler, CMK_Promotion) char[prom->m_compiler->lvaCount * sizeof(LocalsUses)]); + m_uses = reinterpret_cast( + new (prom->m_compiler, CMK_Promotion) char[prom->m_compiler->lvaCount * sizeof(LocalsUses)]); for (size_t i = 0; i < prom->m_compiler->lvaCount; i++) new (&m_uses[i], jitstd::placement_t()) LocalsUses(prom->m_compiler); } @@ -546,7 +549,10 @@ class LocalsUseVisitor : public GenTreeVisitor m_curBB = bb; } - LocalsUses* GetUsesByLocal(unsigned lcl) { return &m_uses[lcl]; } + LocalsUses* GetUsesByLocal(unsigned lcl) + { + return &m_uses[lcl]; + } fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) { @@ -555,23 +561,25 @@ class LocalsUseVisitor : public GenTreeVisitor if (tree->OperIsLocal()) { GenTreeLclVarCommon* lcl = tree->AsLclVarCommon(); - LclVarDsc* dsc = m_compiler->lvaGetDesc(lcl); + LclVarDsc* dsc = m_compiler->lvaGetDesc(lcl); if (!dsc->lvPromoted && (dsc->TypeGet() == TYP_STRUCT) && !dsc->IsAddressExposed()) { if (lcl->OperIsLocalAddr()) { - assert(user->OperIs(GT_CALL) && dsc->IsHiddenBufferStructArg() && (user->AsCall()->gtArgs.GetRetBufferArg()->GetNode() == lcl)); + assert(user->OperIs(GT_CALL) && dsc->IsHiddenBufferStructArg() && + (user->AsCall()->gtArgs.GetRetBufferArg()->GetNode() == lcl)); // TODO: We should record that this is used as the address // of a retbuf -- it makes promotion less desirable as we // have to reload fields back from the retbuf. } else { - unsigned offs = lcl->GetLclOffs(); - var_types accessType = lcl->TypeGet(); - ClassLayout* accessLayout = varTypeIsStruct(lcl) ? lcl->GetLayout(m_compiler) : nullptr; - AccessKindFlags accessFlags = ClassifyLocalAccess(lcl, user); - m_uses[lcl->GetLclNum()].RecordAccess(offs, accessType, accessLayout, accessFlags, m_curBB->getBBWeight(m_compiler)); + unsigned offs = lcl->GetLclOffs(); + var_types accessType = lcl->TypeGet(); + ClassLayout* accessLayout = varTypeIsStruct(lcl) ? lcl->GetLayout(m_compiler) : nullptr; + AccessKindFlags accessFlags = ClassifyLocalAccess(lcl, user); + m_uses[lcl->GetLclNum()].RecordAccess(offs, accessType, accessLayout, accessFlags, + m_curBB->getBBWeight(m_compiler)); } } } @@ -671,36 +679,43 @@ class LocalsUseVisitor : public GenTreeVisitor class ReplaceVisitor : public GenTreeVisitor { - Promotion* m_prom; + Promotion* m_prom; jitstd::vector* m_replacements; - BasicBlock* m_bb; - Statement* m_stmt; - unsigned m_seenAsgs; - unsigned m_seenAsgToLcl; - bool m_madeChanges; - Statement* m_lastStmt; + BasicBlock* m_bb; + Statement* m_stmt; + unsigned m_seenAsgs; + unsigned m_seenAsgToLcl; + bool m_madeChanges; + Statement* m_lastStmt; + public: enum { - DoPostOrder = true, + DoPostOrder = true, UseExecutionOrder = true, }; - ReplaceVisitor(Promotion* prom, jitstd::vector* replacements) : - GenTreeVisitor(prom->m_compiler), m_prom(prom), m_replacements(replacements) + ReplaceVisitor(Promotion* prom, jitstd::vector* replacements) + : GenTreeVisitor(prom->m_compiler), m_prom(prom), m_replacements(replacements) { } - bool MadeChanges() { return m_madeChanges; } - Statement* GetNextStatement() { return m_lastStmt->GetNextStmt(); } + bool MadeChanges() + { + return m_madeChanges; + } + Statement* GetNextStatement() + { + return m_lastStmt->GetNextStmt(); + } void Reset(BasicBlock* bb, Statement* stmt) { - m_bb = bb; - m_stmt = stmt; - m_seenAsgs = 0; + m_bb = bb; + m_stmt = stmt; + m_seenAsgs = 0; m_madeChanges = false; - m_lastStmt = stmt; + m_lastStmt = stmt; } fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) @@ -799,7 +814,7 @@ class ReplaceVisitor : public GenTreeVisitor assert(retBufArg != nullptr); assert(retBufArg->GetNode()->OperIsLocalAddr()); GenTreeLclVarCommon* retBufLcl = retBufArg->GetNode()->AsLclVarCommon(); - unsigned size = m_compiler->typGetObjLayout(call->gtRetClsHnd)->GetSize(); + unsigned size = m_compiler->typGetObjLayout(call->gtRetClsHnd)->GetSize(); MarkForReadBack(retBufLcl->GetLclNum(), retBufLcl->GetLclOffs(), size); } @@ -809,8 +824,8 @@ class ReplaceVisitor : public GenTreeVisitor void ReplaceLocal(GenTree** use, GenTree* user) { - GenTreeLclVarCommon* lcl = (*use)->AsLclVarCommon(); - unsigned lclNum = lcl->GetLclNum(); + GenTreeLclVarCommon* lcl = (*use)->AsLclVarCommon(); + unsigned lclNum = lcl->GetLclNum(); jitstd::vector& replacements = m_replacements[lclNum]; if (replacements.size() <= 0) @@ -818,7 +833,7 @@ class ReplaceVisitor : public GenTreeVisitor return; } - unsigned offs = lcl->GetLclOffs(); + unsigned offs = lcl->GetLclOffs(); var_types accessType = lcl->TypeGet(); #ifdef DEBUG @@ -829,14 +844,15 @@ class ReplaceVisitor : public GenTreeVisitor else { ClassLayout* accessLayout = varTypeIsStruct(accessType) ? lcl->GetLayout(m_compiler) : nullptr; - unsigned accessSize = accessLayout != nullptr ? accessLayout->GetSize() : genTypeSize(accessType); + unsigned accessSize = accessLayout != nullptr ? accessLayout->GetSize() : genTypeSize(accessType); for (const Replacement& rep : replacements) { assert(!rep.Overlaps(offs, accessSize) || ((rep.Offset == offs) && (rep.AccessType == accessType))); } assert((accessType != TYP_STRUCT) || (accessLayout != nullptr)); - JITDUMP("Processing use [%06u] of V%02u.[%03u..%03u)\n", Compiler::dspTreeID(lcl), lclNum, offs, offs + accessSize); + JITDUMP("Processing use [%06u] of V%02u.[%03u..%03u)\n", Compiler::dspTreeID(lcl), lclNum, offs, + offs + accessSize); } #endif @@ -853,13 +869,14 @@ class ReplaceVisitor : public GenTreeVisitor if ((lcl->gtFlags & GTF_VAR_DEF) != 0) { rep.NeedsWriteBack = true; - rep.NeedsReadBack = false; + rep.NeedsReadBack = false; } else if (rep.NeedsReadBack) { GenTree* dst = m_compiler->gtNewLclvNode(rep.LclNum, rep.AccessType); GenTree* src = m_compiler->gtNewLclFldNode(lclNum, rep.AccessType, rep.Offset); - *use = m_compiler->gtNewOperNode(GT_COMMA, (*use)->TypeGet(), m_compiler->gtNewAssignNode(dst, src), *use); + *use = m_compiler->gtNewOperNode(GT_COMMA, (*use)->TypeGet(), m_compiler->gtNewAssignNode(dst, src), + *use); rep.NeedsReadBack = false; } @@ -889,7 +906,7 @@ class ReplaceVisitor : public GenTreeVisitor void WriteBackBefore(GenTree** use, unsigned lcl, unsigned offs, unsigned size) { jitstd::vector replacements = m_replacements[lcl]; - size_t index = BinarySearch(replacements, offs); + size_t index = BinarySearch(replacements, offs); if ((ssize_t)index < 0) { @@ -906,14 +923,15 @@ class ReplaceVisitor : public GenTreeVisitor Replacement& rep = replacements[index]; if (rep.NeedsWriteBack) { - GenTree* dst = m_compiler->gtNewLclFldNode(lcl, rep.AccessType, rep.Offset); - GenTree* src = m_compiler->gtNewLclvNode(rep.LclNum, rep.AccessType); - GenTreeOp* comma = m_compiler->gtNewOperNode(GT_COMMA, (*use)->TypeGet(), m_compiler->gtNewAssignNode(dst, src), *use); + GenTree* dst = m_compiler->gtNewLclFldNode(lcl, rep.AccessType, rep.Offset); + GenTree* src = m_compiler->gtNewLclvNode(rep.LclNum, rep.AccessType); + GenTreeOp* comma = + m_compiler->gtNewOperNode(GT_COMMA, (*use)->TypeGet(), m_compiler->gtNewAssignNode(dst, src), *use); *use = comma; - use = &comma->gtOp2; + use = &comma->gtOp2; rep.NeedsWriteBack = false; - m_madeChanges = true; + m_madeChanges = true; } index++; @@ -923,7 +941,7 @@ class ReplaceVisitor : public GenTreeVisitor void MarkForReadBack(unsigned lcl, unsigned offs, unsigned size, bool conservative = false) { jitstd::vector& replacements = m_replacements[lcl]; - size_t index = BinarySearch(replacements, offs); + size_t index = BinarySearch(replacements, offs); if ((ssize_t)index < 0) { @@ -939,7 +957,7 @@ class ReplaceVisitor : public GenTreeVisitor { Replacement& rep = replacements[index]; assert((rep.Offset >= offs) && (rep.Offset + genTypeSize(rep.AccessType) <= end)); - rep.NeedsReadBack = true; + rep.NeedsReadBack = true; rep.NeedsWriteBack = false; index++; @@ -960,10 +978,10 @@ PhaseStatus Promotion::Run() m_compiler->fgDispBasicBlocks(true); } - //if (!ISMETHOD("NPTest")) - //{ - // return PhaseStatus::MODIFIED_NOTHING; - //} +// if (!ISMETHOD("NPTest")) +//{ +// return PhaseStatus::MODIFIED_NOTHING; +//} #endif LocalsUseVisitor localsUse(this); @@ -991,11 +1009,13 @@ PhaseStatus Promotion::Run() } #endif - bool anyReplacements = false; - jitstd::vector* replacements = reinterpret_cast*>(new (m_compiler, CMK_Promotion) char[sizeof(jitstd::vector) * m_compiler->lvaCount]); + bool anyReplacements = false; + jitstd::vector* replacements = reinterpret_cast*>( + new (m_compiler, CMK_Promotion) char[sizeof(jitstd::vector) * m_compiler->lvaCount]); for (unsigned i = 0; i < numLocals; i++) { - new (&replacements[i], jitstd::placement_t()) jitstd::vector(m_compiler->getAllocator(CMK_Promotion)); + new (&replacements[i], jitstd::placement_t()) + jitstd::vector(m_compiler->getAllocator(CMK_Promotion)); localsUse.GetUsesByLocal(i)->PickPromotions(m_compiler, i, replacements[i]); if (replacements[i].size() > 0) @@ -1003,7 +1023,8 @@ PhaseStatus Promotion::Run() JITDUMP("V%02u promoted with %d replacements\n", i, (int)replacements[i].size()); for (const Replacement& rep : replacements[i]) { - JITDUMP(" [%03u..%03u) promoted as %s V%02u\n", rep.Offset, rep.Offset + genTypeSize(rep.AccessType), varTypeName(rep.AccessType), rep.LclNum); + JITDUMP(" [%03u..%03u) promoted as %s V%02u\n", rep.Offset, rep.Offset + genTypeSize(rep.AccessType), + varTypeName(rep.AccessType), rep.LclNum); anyReplacements = true; } } @@ -1041,11 +1062,8 @@ PhaseStatus Promotion::Run() assert(!rep.NeedsReadBack || !rep.NeedsWriteBack); if (rep.NeedsReadBack) { - JITDUMP("Reading back replacement V%02u.[%03u..%03u) -> V%02u at the end of " FMT_BB "\n", - i, - rep.Offset, rep.Offset + genTypeSize(rep.AccessType), - rep.LclNum, - bb->bbNum); + JITDUMP("Reading back replacement V%02u.[%03u..%03u) -> V%02u at the end of " FMT_BB "\n", i, + rep.Offset, rep.Offset + genTypeSize(rep.AccessType), rep.LclNum, bb->bbNum); GenTree* dst = m_compiler->gtNewLclvNode(rep.LclNum, rep.AccessType); GenTree* src = m_compiler->gtNewLclFldNode(i, rep.AccessType, rep.Offset); @@ -1068,9 +1086,9 @@ PhaseStatus Promotion::Run() m_compiler->fgEnsureFirstBBisScratch(); - GenTree* dst = m_compiler->gtNewLclvNode(rep.LclNum, rep.AccessType); - GenTree* src = m_compiler->gtNewLclFldNode(i, rep.AccessType, rep.Offset); - GenTree* asg = m_compiler->gtNewAssignNode(dst, src); + GenTree* dst = m_compiler->gtNewLclvNode(rep.LclNum, rep.AccessType); + GenTree* src = m_compiler->gtNewLclFldNode(i, rep.AccessType, rep.Offset); + GenTree* asg = m_compiler->gtNewAssignNode(dst, src); Statement* stmt = m_compiler->fgNewStmtFromTree(asg); if (prevParamLoadStmt != nullptr) { @@ -1088,10 +1106,11 @@ PhaseStatus Promotion::Run() return PhaseStatus::MODIFIED_EVERYTHING; } -bool Promotion::ParseLocation(GenTree* tree, unsigned* lcl, unsigned* offs, var_types* accessType, ClassLayout** accessLayout) +bool Promotion::ParseLocation( + GenTree* tree, unsigned* lcl, unsigned* offs, var_types* accessType, ClassLayout** accessLayout) { - *offs = 0; - *accessType = TYP_UNDEF; + *offs = 0; + *accessType = TYP_UNDEF; *accessLayout = nullptr; while (true) @@ -1102,7 +1121,7 @@ bool Promotion::ParseLocation(GenTree* tree, unsigned* lcl, unsigned* offs, var_ *offs += tree->AsLclVarCommon()->GetLclOffs(); if (*accessType == TYP_UNDEF) { - *accessType = tree->TypeGet(); + *accessType = tree->TypeGet(); *accessLayout = varTypeIsStruct(tree) ? tree->AsLclVarCommon()->GetLayout(m_compiler) : nullptr; } @@ -1121,10 +1140,11 @@ bool Promotion::ParseLocation(GenTree* tree, unsigned* lcl, unsigned* offs, var_ if (*accessType == TYP_UNDEF) { CORINFO_CLASS_HANDLE fieldClassHandle; - var_types fieldType = m_compiler->eeGetFieldType(tree->AsField()->gtFldHnd, &fieldClassHandle); + var_types fieldType = m_compiler->eeGetFieldType(tree->AsField()->gtFldHnd, &fieldClassHandle); *accessType = fieldType == TYP_STRUCT ? m_compiler->impNormStructType(fieldClassHandle) : fieldType; - *accessLayout = varTypeIsStruct(fieldType) ? m_compiler->typGetObjLayout(fieldClassHandle) : nullptr; + *accessLayout = + varTypeIsStruct(fieldType) ? m_compiler->typGetObjLayout(fieldClassHandle) : nullptr; } *offs += tree->AsField()->gtFldOffset; @@ -1134,7 +1154,7 @@ bool Promotion::ParseLocation(GenTree* tree, unsigned* lcl, unsigned* offs, var_ { if (*accessType == TYP_UNDEF) { - *accessType = tree->TypeGet(); + *accessType = tree->TypeGet(); *accessLayout = tree->OperIsBlk() ? tree->AsBlk()->GetLayout() : nullptr; } diff --git a/src/coreclr/jit/promotion.h b/src/coreclr/jit/promotion.h index 7d6fbd78b3f61..512abb2b1429a 100644 --- a/src/coreclr/jit/promotion.h +++ b/src/coreclr/jit/promotion.h @@ -14,6 +14,7 @@ class Promotion friend class ReplaceVisitor; bool ParseLocation(GenTree* tree, unsigned* lcl, unsigned* offs, var_types* accessType, ClassLayout** accessLayout); + public: explicit Promotion(Compiler* compiler) : m_compiler(compiler) { From 14113a91ae8ade5af8df9bb01f2fdb2d0c0e8994 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 22 Mar 2023 13:56:19 +0100 Subject: [PATCH 16/52] Remove some layouts, change some costing, clean up --- src/coreclr/jit/promotion.cpp | 94 ++++++++++------------------------- 1 file changed, 25 insertions(+), 69 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 5f01d27194c2e..ec3bd2656b692 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -42,15 +42,10 @@ struct Access unsigned CountReturns = 0; unsigned CountPassedAsRetbuf = 0; - // Number of times we saw this access. weight_t CountWtd = 0; - // Number of times this access is on the RHS of an assignment. weight_t CountAssignmentSourceWtd = 0; - // Number of times this access is on the LHS of an assignment. weight_t CountAssignmentDestinationWtd = 0; - // Number of times this access is on the RHS of an assignment where the LHS is a probable register candidate. weight_t CountAssignmentsToRegisterCandidateWtd = 0; - // Number of times this access is on the LHS of an assignment where the RHS is a probable register candidate. weight_t CountAssignmentsFromRegisterCandidateWtd = 0; weight_t CountCallArgsWtd = 0; weight_t CountCallArgsByImplicitRefWtd = 0; @@ -123,13 +118,12 @@ struct Replacement unsigned Offset; var_types AccessType; unsigned LclNum; - ClassLayout* Layout; bool NeedsWriteBack = true; bool NeedsReadBack = false; Replacement* Next = nullptr; - Replacement(unsigned offset, var_types accessType, unsigned lclNum, ClassLayout* layout) - : Offset(offset), AccessType(accessType), LclNum(lclNum), Layout(layout) + Replacement(unsigned offset, var_types accessType, unsigned lclNum) + : Offset(offset), AccessType(accessType), LclNum(lclNum) { } @@ -213,19 +207,10 @@ class LocalsUses do { Access& candidateAccess = m_accesses[index]; - if (candidateAccess.AccessType == accessType) + if ((candidateAccess.AccessType == accessType) && (candidateAccess.Layout == accessLayout)) { - // Some operations on SIMD types do not require a layout, but those can be merged with ones that - // do. - bool isMergeableLayout = (candidateAccess.Layout == accessLayout) || - (varTypeIsSIMD(accessType) && (candidateAccess.Layout == nullptr)); - - if (isMergeableLayout) - { - access = &candidateAccess; - access->Layout = accessLayout; - break; - } + access = &candidateAccess; + break; } index++; @@ -361,13 +346,6 @@ class LocalsUses continue; } - if (varTypeIsSIMD(access.AccessType) && (access.Layout == nullptr)) - { - // We need a layout to create a local. - // TODO: Use canonical layout? - continue; - } - if (!EvaluateReplacement(comp, lclNum, access)) { continue; @@ -382,16 +360,8 @@ class LocalsUses strcpy_s(bufp, len, buf); #endif unsigned newLcl = comp->lvaGrabTemp(false DEBUGARG(bufp)); - if (varTypeIsStruct(access.AccessType)) - { - comp->lvaSetStruct(newLcl, access.Layout->GetClassHandle(), false); - } - else - { - comp->lvaGetDesc(newLcl)->lvType = access.AccessType; - } - - replacements.push_back(Replacement(access.Offset, access.AccessType, newLcl, access.Layout)); + comp->lvaGetDesc(newLcl)->lvType = access.AccessType; + replacements.push_back(Replacement(access.Offset, access.AccessType, newLcl)); } } @@ -440,18 +410,16 @@ class LocalsUses weight_t costWithout = 0; // A normal access without promotion looks like: // mov reg, [reg+offs] - // Which is 5 bytes on x64. + // It may also be contained. Overall we are going to cost each use of + // an unpromoted local at 6.5 bytes. + // TODO: We can make much better guesses on what will and won't be contained. - costWithout += access.CountWtd * 50; - - // The register then also needs to be used. In many cases it is containable, however, so we pay a bit less for - // this. - costWithout += access.CountWtd * 25; + costWithout += access.CountWtd * 6.5; weight_t costWith = 0; - // For any use we expect to just use the register directly. - costWith += access.CountWtd * 25; + // For any use we expect to just use the register directly. We will cost this at 3.5 bytes. + costWith += access.CountWtd * 3.5; weight_t countReadBacksWtd = 0; LclVarDsc* lcl = comp->lvaGetDesc(lclNum); @@ -464,12 +432,13 @@ class LocalsUses countReadBacksWtd += countOverlappedRetbufsWtd; countReadBacksWtd += countOverlappedAssignmentDestinationWtd; - costWith += countReadBacksWtd * 50; + // A read back puts the value from stack back to register. We cost it at 5 bytes. + costWith += countReadBacksWtd * 5; // Write backs with TYP_REFs when the base local is an implicit byref // involves checked write barriers, so they are very expensive. // TODO-CQ: This should be adjusted once we type implicit byrefs as TYP_I_IMPL. - int writeBackCost = comp->lvaIsImplicitByRefLocal(lclNum) && (access.AccessType == TYP_REF) ? 150 : 50; + weight_t writeBackCost = comp->lvaIsImplicitByRefLocal(lclNum) && (access.AccessType == TYP_REF) ? 15 : 5; weight_t countWriteBacksWtd = countOverlappedCallsWtd + countOverlappedReturnsWtd + countOverlappedAssignmentSourceWtd; costWith += countWriteBacksWtd * writeBackCost; @@ -511,14 +480,14 @@ class LocalsUses printf(" %s @ %03u\n", varTypeName(access.AccessType), access.Offset); } - printf(" #: %u\n", access.Count); - printf(" # assigned from: %u\n", access.CountAssignmentSource); - printf(" # assigned to: %u\n", access.CountAssignmentDestination); - printf(" # as call arg: %u\n", access.CountCallArgs); - printf(" # as implicit by-ref call arg: %u\n", access.CountCallArgsByImplicitRef); - printf(" # as on-stack call arg: %u\n", access.CountCallArgsOnStack); - printf(" # as retbuf: %u\n", access.CountPassedAsRetbuf); - printf(" # as returned value: %u\n\n", access.CountReturns); + printf(" #: (%u, " FMT_WT ")\n", access.Count, access.CountWtd); + printf(" # assigned from: (%u, " FMT_WT ")\n", access.CountAssignmentSource, access.CountAssignmentSourceWtd); + printf(" # assigned to: (%u, " FMT_WT ")\n", access.CountAssignmentDestination, access.CountAssignmentDestinationWtd); + printf(" # as call arg: (%u, " FMT_WT ")\n", access.CountCallArgs, access.CountCallArgsWtd); + printf(" # as implicit by-ref call arg: (%u, " FMT_WT ")\n", access.CountCallArgsByImplicitRef, access.CountCallArgsByImplicitRefWtd); + printf(" # as on-stack call arg: (%u, " FMT_WT ")\n", access.CountCallArgsOnStack, access.CountCallArgsOnStackWtd); + printf(" # as retbuf: (%u, " FMT_WT ")\n", access.CountPassedAsRetbuf, access.CountPassedAsRetbufWtd); + printf(" # as returned value: (%u, " FMT_WT ")\n\n", access.CountReturns, access.CountReturnsWtd); } } #endif @@ -576,7 +545,7 @@ class LocalsUseVisitor : public GenTreeVisitor { unsigned offs = lcl->GetLclOffs(); var_types accessType = lcl->TypeGet(); - ClassLayout* accessLayout = varTypeIsStruct(lcl) ? lcl->GetLayout(m_compiler) : nullptr; + ClassLayout* accessLayout = accessType == TYP_STRUCT ? lcl->GetLayout(m_compiler) : nullptr; AccessKindFlags accessFlags = ClassifyLocalAccess(lcl, user); m_uses[lcl->GetLclNum()].RecordAccess(offs, accessType, accessLayout, accessFlags, m_curBB->getBBWeight(m_compiler)); @@ -972,18 +941,6 @@ class ReplaceVisitor : public GenTreeVisitor PhaseStatus Promotion::Run() { -#ifdef DEBUG - if (m_compiler->verbose) - { - m_compiler->fgDispBasicBlocks(true); - } - -// if (!ISMETHOD("NPTest")) -//{ -// return PhaseStatus::MODIFIED_NOTHING; -//} -#endif - LocalsUseVisitor localsUse(this); for (BasicBlock* bb : m_compiler->Blocks()) { @@ -991,7 +948,6 @@ PhaseStatus Promotion::Run() for (Statement* stmt : bb->Statements()) { - DISPSTMT(stmt); localsUse.WalkTree(stmt->GetRootNodePointer(), nullptr); } } From 0c7bbf3a5b83b75329fb27ad1b80581c6487f16e Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 22 Mar 2023 13:59:59 +0100 Subject: [PATCH 17/52] Early out when there are no locals Otherwise we hit asserts when trying to allocate 0 bytes of memory. --- src/coreclr/jit/promotion.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index ec3bd2656b692..89f91436b8569 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -941,6 +941,11 @@ class ReplaceVisitor : public GenTreeVisitor PhaseStatus Promotion::Run() { + if (m_compiler->lvaCount <= 0) + { + return PhaseStatus::MODIFIED_NOTHING; + } + LocalsUseVisitor localsUse(this); for (BasicBlock* bb : m_compiler->Blocks()) { From 0b140bfe308ee05ad9f896d72c4a7fd117403852 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 22 Mar 2023 14:03:19 +0100 Subject: [PATCH 18/52] Run jit-format --- src/coreclr/jit/promotion.cpp | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 89f91436b8569..e96ee3499f47a 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -42,10 +42,10 @@ struct Access unsigned CountReturns = 0; unsigned CountPassedAsRetbuf = 0; - weight_t CountWtd = 0; - weight_t CountAssignmentSourceWtd = 0; - weight_t CountAssignmentDestinationWtd = 0; - weight_t CountAssignmentsToRegisterCandidateWtd = 0; + weight_t CountWtd = 0; + weight_t CountAssignmentSourceWtd = 0; + weight_t CountAssignmentDestinationWtd = 0; + weight_t CountAssignmentsToRegisterCandidateWtd = 0; weight_t CountAssignmentsFromRegisterCandidateWtd = 0; weight_t CountCallArgsWtd = 0; weight_t CountCallArgsByImplicitRefWtd = 0; @@ -209,7 +209,7 @@ class LocalsUses Access& candidateAccess = m_accesses[index]; if ((candidateAccess.AccessType == accessType) && (candidateAccess.Layout == accessLayout)) { - access = &candidateAccess; + access = &candidateAccess; break; } @@ -359,7 +359,7 @@ class LocalsUses char* bufp = new (comp, CMK_DebugOnly) char[len]; strcpy_s(bufp, len, buf); #endif - unsigned newLcl = comp->lvaGrabTemp(false DEBUGARG(bufp)); + unsigned newLcl = comp->lvaGrabTemp(false DEBUGARG(bufp)); comp->lvaGetDesc(newLcl)->lvType = access.AccessType; replacements.push_back(Replacement(access.Offset, access.AccessType, newLcl)); } @@ -481,13 +481,20 @@ class LocalsUses } printf(" #: (%u, " FMT_WT ")\n", access.Count, access.CountWtd); - printf(" # assigned from: (%u, " FMT_WT ")\n", access.CountAssignmentSource, access.CountAssignmentSourceWtd); - printf(" # assigned to: (%u, " FMT_WT ")\n", access.CountAssignmentDestination, access.CountAssignmentDestinationWtd); - printf(" # as call arg: (%u, " FMT_WT ")\n", access.CountCallArgs, access.CountCallArgsWtd); - printf(" # as implicit by-ref call arg: (%u, " FMT_WT ")\n", access.CountCallArgsByImplicitRef, access.CountCallArgsByImplicitRefWtd); - printf(" # as on-stack call arg: (%u, " FMT_WT ")\n", access.CountCallArgsOnStack, access.CountCallArgsOnStackWtd); - printf(" # as retbuf: (%u, " FMT_WT ")\n", access.CountPassedAsRetbuf, access.CountPassedAsRetbufWtd); - printf(" # as returned value: (%u, " FMT_WT ")\n\n", access.CountReturns, access.CountReturnsWtd); + printf(" # assigned from: (%u, " FMT_WT ")\n", access.CountAssignmentSource, + access.CountAssignmentSourceWtd); + printf(" # assigned to: (%u, " FMT_WT ")\n", access.CountAssignmentDestination, + access.CountAssignmentDestinationWtd); + printf(" # as call arg: (%u, " FMT_WT ")\n", access.CountCallArgs, + access.CountCallArgsWtd); + printf(" # as implicit by-ref call arg: (%u, " FMT_WT ")\n", access.CountCallArgsByImplicitRef, + access.CountCallArgsByImplicitRefWtd); + printf(" # as on-stack call arg: (%u, " FMT_WT ")\n", access.CountCallArgsOnStack, + access.CountCallArgsOnStackWtd); + printf(" # as retbuf: (%u, " FMT_WT ")\n", access.CountPassedAsRetbuf, + access.CountPassedAsRetbufWtd); + printf(" # as returned value: (%u, " FMT_WT ")\n\n", access.CountReturns, + access.CountReturnsWtd); } } #endif From c4a04d7b757a243b50301938a0797a83b7eda5e0 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 22 Mar 2023 14:08:54 +0100 Subject: [PATCH 19/52] Remove dead code --- src/coreclr/jit/promotion.cpp | 78 ----------------------------------- src/coreclr/jit/promotion.h | 2 - 2 files changed, 80 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index e96ee3499f47a..91ce9d16897cb 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -1073,81 +1073,3 @@ PhaseStatus Promotion::Run() return PhaseStatus::MODIFIED_EVERYTHING; } - -bool Promotion::ParseLocation( - GenTree* tree, unsigned* lcl, unsigned* offs, var_types* accessType, ClassLayout** accessLayout) -{ - *offs = 0; - *accessType = TYP_UNDEF; - *accessLayout = nullptr; - - while (true) - { - if (tree->OperIsLocalRead()) - { - *lcl = tree->AsLclVarCommon()->GetLclNum(); - *offs += tree->AsLclVarCommon()->GetLclOffs(); - if (*accessType == TYP_UNDEF) - { - *accessType = tree->TypeGet(); - *accessLayout = varTypeIsStruct(tree) ? tree->AsLclVarCommon()->GetLayout(m_compiler) : nullptr; - } - - return true; - } - - if (tree->OperIs(GT_FIELD) || tree->OperIsIndir()) - { - if (tree->OperIs(GT_FIELD)) - { - if (tree->AsField()->GetFldObj() == nullptr) - { - return false; - } - - if (*accessType == TYP_UNDEF) - { - CORINFO_CLASS_HANDLE fieldClassHandle; - var_types fieldType = m_compiler->eeGetFieldType(tree->AsField()->gtFldHnd, &fieldClassHandle); - - *accessType = fieldType == TYP_STRUCT ? m_compiler->impNormStructType(fieldClassHandle) : fieldType; - *accessLayout = - varTypeIsStruct(fieldType) ? m_compiler->typGetObjLayout(fieldClassHandle) : nullptr; - } - - *offs += tree->AsField()->gtFldOffset; - tree = tree->AsField()->GetFldObj(); - } - else - { - if (*accessType == TYP_UNDEF) - { - *accessType = tree->TypeGet(); - *accessLayout = tree->OperIsBlk() ? tree->AsBlk()->GetLayout() : nullptr; - } - - tree = tree->AsIndir()->Addr(); - } - - switch (tree->gtOper) - { - case GT_ADDR: - tree = tree->gtGetOp1(); - continue; - case GT_LCL_VAR_ADDR: - *lcl = tree->AsLclVarCommon()->GetLclNum(); - return true; - case GT_LCL_FLD_ADDR: - *lcl = tree->AsLclVarCommon()->GetLclNum(); - *offs += tree->AsLclVarCommon()->GetLclOffs(); - return true; - default: - return false; - } - } - else - { - return false; - } - } -} diff --git a/src/coreclr/jit/promotion.h b/src/coreclr/jit/promotion.h index 512abb2b1429a..d19b5ea54e359 100644 --- a/src/coreclr/jit/promotion.h +++ b/src/coreclr/jit/promotion.h @@ -13,8 +13,6 @@ class Promotion friend class LocalsUseVisitor; friend class ReplaceVisitor; - bool ParseLocation(GenTree* tree, unsigned* lcl, unsigned* offs, var_types* accessType, ClassLayout** accessLayout); - public: explicit Promotion(Compiler* compiler) : m_compiler(compiler) { From c9974d5a96b5a2953e708c645fc584110f1192bc Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 22 Mar 2023 15:49:46 +0100 Subject: [PATCH 20/52] Some more cleanup --- src/coreclr/jit/jitconfigvalues.h | 1 - src/coreclr/jit/promotion.cpp | 46 +------------------------------ 2 files changed, 1 insertion(+), 46 deletions(-) diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 871f83b9ddc36..52aab9d99f4e1 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -131,7 +131,6 @@ CONFIG_INTEGER(JitNoMemoryBarriers, W("JitNoMemoryBarriers"), 0) // If 1, don't CONFIG_INTEGER(JitNoRegLoc, W("JitNoRegLoc"), 0) CONFIG_INTEGER(JitNoStructPromotion, W("JitNoStructPromotion"), 0) // Disables struct promotion 1 - for all, 2 - for // params. -CONFIG_INTEGER(JitNewStructPromotion, W("JitNewStructPromotion"), 1) // Use new struct promotion CONFIG_INTEGER(JitNoUnroll, W("JitNoUnroll"), 0) CONFIG_INTEGER(JitOrder, W("JitOrder"), 0) CONFIG_INTEGER(JitQueryCurrentStaticFieldClass, W("JitQueryCurrentStaticFieldClass"), 1) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 91ce9d16897cb..b0611694466a0 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -10,7 +10,7 @@ PhaseStatus Compiler::PromoteStructsNew() } #ifdef DEBUG - if (JitConfig.JitNewStructPromotion() == 0) + if (fgNoStructPromotion) { return PhaseStatus::MODIFIED_NOTHING; } @@ -292,50 +292,6 @@ class LocalsUses return; } - // jitstd::sort( - // m_accesses.begin(), m_accesses.end(), - // [](const Access& l, const Access& r) - // { - // if (l.Offset < r.Offset) - // { - // return true; - // } - - // if (l.Offset > r.Offset) - // { - // return false; - // } - - // if (l.Count > r.Count) - // { - // return true; - // } - - // if (l.Count < r.Count) - // { - // return false; - // } - - // // TODO: Better heuristics, use info about struct uses to decrease benefit of promotions - // if (r.AccessType == TYP_STRUCT) - // { - // if (l.AccessType == TYP_STRUCT) - // { - // return l.Layout < r.Layout; - // } - - // return true; - // } - - // if (l.AccessType == TYP_STRUCT) - // { - // return false; - // } - - // assert(l.AccessType != r.AccessType); - // return l.AccessType < r.AccessType; - // }); - assert(replacements.size() == 0); for (size_t i = 0; i < m_accesses.size(); i++) { From fc075a3b1f5a5dbfa49de4814def332ce33e685b Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 22 Mar 2023 17:22:14 +0100 Subject: [PATCH 21/52] Add more fine-grained stress modes --- src/coreclr/jit/compiler.h | 3 +++ src/coreclr/jit/jitconfigvalues.h | 1 + src/coreclr/jit/morph.cpp | 8 ++++++++ src/coreclr/jit/promotion.cpp | 23 ++++++++++++++++++++++- 4 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 543d435f7976b..3f01587342759 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -9830,6 +9830,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX STRESS_MODE(SSA_INFO) /* Select lower thresholds for "complex" SSA num encoding */ \ STRESS_MODE(SPLIT_TREES_RANDOMLY) /* Split all statements at a random tree */ \ STRESS_MODE(SPLIT_TREES_REMOVE_COMMAS) /* Remove all GT_COMMA nodes */ \ + STRESS_MODE(NO_OLD_PROMOTION) /* Do not use old promotion */ \ + STRESS_MODE(GENERALIZED_PROMOTION) /* Use generalized promotion */ \ + STRESS_MODE(GENERALIZED_PROMOTION_COST) \ \ /* After COUNT_VARN, stress level 2 does all of these all the time */ \ \ diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 52aab9d99f4e1..92bc6f4919b94 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -432,6 +432,7 @@ CONFIG_STRING(JitEnableVNBasedDeadStoreRemovalRange, W("JitEnableVNBasedDeadStor CONFIG_STRING(JitEnableEarlyLivenessRange, W("JitEnableEarlyLivenessRange")) CONFIG_STRING(JitOnlyOptimizeRange, W("JitOnlyOptimizeRange")) // If set, all methods that do _not_ match are forced into MinOpts +CONFIG_STRING(JitEnableGeneralizedPromotionRange, W("JitEnableGeneralizedPromotionRange")) CONFIG_INTEGER(JitDoSsa, W("JitDoSsa"), 1) // Perform Static Single Assignment (SSA) numbering on the variables CONFIG_INTEGER(JitDoValueNumber, W("JitDoValueNumber"), 1) // Perform value numbering on method expressions diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index b4c81573e0446..33d5e598e2cee 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -14731,6 +14731,14 @@ PhaseStatus Compiler::fgPromoteStructs() return PhaseStatus::MODIFIED_NOTHING; } +#ifdef DEBUG + if (compStressCompile(STRESS_NO_OLD_PROMOTION, 20)) + { + JITDUMP(" skipping due to stress\n"); + return PhaseStatus::MODIFIED_NOTHING; + } +#endif + #if 0 // The code in this #if has been useful in debugging struct promotion issues, by // enabling selective enablement of the struct promotion optimization according to diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index b0611694466a0..68784001876df 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -9,11 +9,24 @@ PhaseStatus Compiler::PromoteStructsNew() return PhaseStatus::MODIFIED_NOTHING; } -#ifdef DEBUG if (fgNoStructPromotion) { return PhaseStatus::MODIFIED_NOTHING; } + + //if (!compStressCompile(STRESS_GENERALIZED_PROMOTION)) + //{ + // return PhaseStatus::MODIFIED_NOTHING; + //} + +#ifdef DEBUG + static ConfigMethodRange s_range; + s_range.EnsureInit(JitConfig.JitEnableGeneralizedPromotionRange()); + + if (!s_range.Contains(info.compMethodHash())) + { + return PhaseStatus::MODIFIED_NOTHING; + } #endif Promotion prom(this); @@ -412,6 +425,14 @@ class LocalsUses return true; } +#ifdef DEBUG + if (comp->compStressCompile(Compiler::STRESS_GENERALIZED_PROMOTION_COST, 25)) + { + JITDUMP(" Promoting replacement due to stress\n"); + return true; + } +#endif + JITDUMP(" Disqualifying replacement\n"); return false; } From 0c55c69e088c7c80fa1513defb1a3579988a0962 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 23 Mar 2023 15:14:57 +0100 Subject: [PATCH 22/52] Add some stress modes --- src/coreclr/jit/promotion.cpp | 58 ++++++++++++++++++++++------------- src/coreclr/jit/promotion.h | 2 ++ 2 files changed, 39 insertions(+), 21 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 68784001876df..9d43b68683e0b 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -392,8 +392,8 @@ class LocalsUses weight_t countReadBacksWtd = 0; LclVarDsc* lcl = comp->lvaGetDesc(lclNum); - // For parameters we need an initial read back - if (lcl->lvIsParam) + // For parameters or OSR locals we need an initial read back + if (lcl->lvIsParam || lcl->lvIsOSRLocal) { countReadBacksWtd += comp->fgFirstBB->getBBWeight(comp); } @@ -1022,31 +1022,47 @@ PhaseStatus Promotion::Run() } } - Statement* prevParamLoadStmt = nullptr; - for (unsigned i = 0; i < m_compiler->info.compArgsCount; i++) + Statement* prevInitialReadBackStmt = nullptr; + for (unsigned lclNum = 0; lclNum < m_compiler->info.compArgsCount; lclNum++) { - for (unsigned j = 0; j < replacements[i].size(); j++) - { - Replacement& rep = replacements[i][j]; - - m_compiler->fgEnsureFirstBBisScratch(); + InsertInitialReadBack(lclNum, replacements[lclNum], &prevInitialReadBackStmt); + } - GenTree* dst = m_compiler->gtNewLclvNode(rep.LclNum, rep.AccessType); - GenTree* src = m_compiler->gtNewLclFldNode(i, rep.AccessType, rep.Offset); - GenTree* asg = m_compiler->gtNewAssignNode(dst, src); - Statement* stmt = m_compiler->fgNewStmtFromTree(asg); - if (prevParamLoadStmt != nullptr) - { - m_compiler->fgInsertStmtAfter(m_compiler->fgFirstBB, prevParamLoadStmt, stmt); - } - else + if (m_compiler->opts.IsOSR()) + { + for (unsigned lclNum = m_compiler->info.compArgsCount; lclNum < m_compiler->lvaCount; lclNum++) + { + if (m_compiler->lvaGetDesc(lclNum)->lvIsOSRLocal) { - m_compiler->fgInsertStmtAtBeg(m_compiler->fgFirstBB, stmt); + InsertInitialReadBack(lclNum, replacements[lclNum], &prevInitialReadBackStmt); } - - prevParamLoadStmt = stmt; } } return PhaseStatus::MODIFIED_EVERYTHING; } + +void Promotion::InsertInitialReadBack(unsigned lclNum, const jitstd::vector& replacements, Statement** prevStmt) +{ + for (unsigned i = 0; i < replacements.size(); i++) + { + const Replacement& rep = replacements[i]; + + m_compiler->fgEnsureFirstBBisScratch(); + + GenTree* dst = m_compiler->gtNewLclvNode(rep.LclNum, rep.AccessType); + GenTree* src = m_compiler->gtNewLclFldNode(lclNum, rep.AccessType, rep.Offset); + GenTree* asg = m_compiler->gtNewAssignNode(dst, src); + Statement* stmt = m_compiler->fgNewStmtFromTree(asg); + if (*prevStmt != nullptr) + { + m_compiler->fgInsertStmtAfter(m_compiler->fgFirstBB, *prevStmt, stmt); + } + else + { + m_compiler->fgInsertStmtAtBeg(m_compiler->fgFirstBB, stmt); + } + + *prevStmt = stmt; + } +} diff --git a/src/coreclr/jit/promotion.h b/src/coreclr/jit/promotion.h index d19b5ea54e359..c5909d77fec6e 100644 --- a/src/coreclr/jit/promotion.h +++ b/src/coreclr/jit/promotion.h @@ -5,6 +5,7 @@ #define _PROMOTION_H class Compiler; +struct Replacement; class Promotion { @@ -13,6 +14,7 @@ class Promotion friend class LocalsUseVisitor; friend class ReplaceVisitor; + void InsertInitialReadBack(unsigned lclNum, const jitstd::vector& replacements, Statement** prevStmt); public: explicit Promotion(Compiler* compiler) : m_compiler(compiler) { From 91db180d078c3346c76d79f2d24964cf3615fb41 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 23 Mar 2023 15:29:44 +0100 Subject: [PATCH 23/52] Uncomment stress check --- src/coreclr/jit/promotion.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 9d43b68683e0b..e1604878ea3c7 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -14,10 +14,10 @@ PhaseStatus Compiler::PromoteStructsNew() return PhaseStatus::MODIFIED_NOTHING; } - //if (!compStressCompile(STRESS_GENERALIZED_PROMOTION)) - //{ - // return PhaseStatus::MODIFIED_NOTHING; - //} + if (!compStressCompile(STRESS_GENERALIZED_PROMOTION, 25)) + { + return PhaseStatus::MODIFIED_NOTHING; + } #ifdef DEBUG static ConfigMethodRange s_range; From 6ec92c8dab6bedc91a0eab75f6474f9f0c17135f Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 28 Mar 2023 11:36:01 +0200 Subject: [PATCH 24/52] Propagate lvSuppressedZeroInit --- src/coreclr/jit/promotion.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index e1604878ea3c7..bc6be5afad995 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -305,6 +305,12 @@ class LocalsUses return; } + // We may be relying on zero-init as part of the prolog for the struct + // local. We need to propagate this to all replacement locals too to + // allow downstream phases to properly know that the local may need to + // be zeroed. + + bool suppressedInit = comp->lvaGetDesc(lclNum)->lvSuppressedZeroInit; assert(replacements.size() == 0); for (size_t i = 0; i < m_accesses.size(); i++) { @@ -329,7 +335,9 @@ class LocalsUses strcpy_s(bufp, len, buf); #endif unsigned newLcl = comp->lvaGrabTemp(false DEBUGARG(bufp)); - comp->lvaGetDesc(newLcl)->lvType = access.AccessType; + LclVarDsc* dsc = comp->lvaGetDesc(newLcl); + dsc->lvType = access.AccessType; + dsc->lvSuppressedZeroInit = suppressedInit; replacements.push_back(Replacement(access.Offset, access.AccessType, newLcl)); } } From c8e4dcbed3e708bb79a65ef8277275e7e635fb61 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 28 Mar 2023 12:01:59 +0200 Subject: [PATCH 25/52] Comment stress mode only enabling --- src/coreclr/jit/promotion.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index bc6be5afad995..bc3528ac47145 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -14,10 +14,10 @@ PhaseStatus Compiler::PromoteStructsNew() return PhaseStatus::MODIFIED_NOTHING; } - if (!compStressCompile(STRESS_GENERALIZED_PROMOTION, 25)) - { - return PhaseStatus::MODIFIED_NOTHING; - } + //if (!compStressCompile(STRESS_GENERALIZED_PROMOTION, 25)) + //{ + // return PhaseStatus::MODIFIED_NOTHING; + //} #ifdef DEBUG static ConfigMethodRange s_range; From 31f87697a66dd45519a2c3f4fb62ef61354e0ae3 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 28 Mar 2023 12:02:24 +0200 Subject: [PATCH 26/52] Run jit-format --- src/coreclr/jit/jitconfigvalues.h | 4 ++-- src/coreclr/jit/promotion.cpp | 24 +++++++++++++----------- src/coreclr/jit/promotion.h | 1 + 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 5352eff477385..ad75a8a274411 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -129,8 +129,8 @@ CONFIG_INTEGER(JitNoHoist, W("JitNoHoist"), 0) CONFIG_INTEGER(JitNoInline, W("JitNoInline"), 0) // Disables inlining of all methods CONFIG_INTEGER(JitNoMemoryBarriers, W("JitNoMemoryBarriers"), 0) // If 1, don't generate memory barriers CONFIG_INTEGER(JitNoRegLoc, W("JitNoRegLoc"), 0) -CONFIG_INTEGER(JitNoStructPromotion, W("JitNoStructPromotion"), 0) // Disables struct promotion 1 - for all, 2 - for - // params. +CONFIG_INTEGER(JitNoStructPromotion, W("JitNoStructPromotion"), 0) // Disables struct promotion 1 - for all, 2 - for + // params. CONFIG_INTEGER(JitNoUnroll, W("JitNoUnroll"), 0) CONFIG_INTEGER(JitOrder, W("JitOrder"), 0) CONFIG_INTEGER(JitQueryCurrentStaticFieldClass, W("JitQueryCurrentStaticFieldClass"), 1) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index bc3528ac47145..e9612394bf372 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -14,10 +14,10 @@ PhaseStatus Compiler::PromoteStructsNew() return PhaseStatus::MODIFIED_NOTHING; } - //if (!compStressCompile(STRESS_GENERALIZED_PROMOTION, 25)) - //{ - // return PhaseStatus::MODIFIED_NOTHING; - //} +// if (!compStressCompile(STRESS_GENERALIZED_PROMOTION, 25)) +//{ +// return PhaseStatus::MODIFIED_NOTHING; +//} #ifdef DEBUG static ConfigMethodRange s_range; @@ -334,9 +334,9 @@ class LocalsUses char* bufp = new (comp, CMK_DebugOnly) char[len]; strcpy_s(bufp, len, buf); #endif - unsigned newLcl = comp->lvaGrabTemp(false DEBUGARG(bufp)); - LclVarDsc* dsc = comp->lvaGetDesc(newLcl); - dsc->lvType = access.AccessType; + unsigned newLcl = comp->lvaGrabTemp(false DEBUGARG(bufp)); + LclVarDsc* dsc = comp->lvaGetDesc(newLcl); + dsc->lvType = access.AccessType; dsc->lvSuppressedZeroInit = suppressedInit; replacements.push_back(Replacement(access.Offset, access.AccessType, newLcl)); } @@ -1050,7 +1050,9 @@ PhaseStatus Promotion::Run() return PhaseStatus::MODIFIED_EVERYTHING; } -void Promotion::InsertInitialReadBack(unsigned lclNum, const jitstd::vector& replacements, Statement** prevStmt) +void Promotion::InsertInitialReadBack(unsigned lclNum, + const jitstd::vector& replacements, + Statement** prevStmt) { for (unsigned i = 0; i < replacements.size(); i++) { @@ -1058,9 +1060,9 @@ void Promotion::InsertInitialReadBack(unsigned lclNum, const jitstd::vectorfgEnsureFirstBBisScratch(); - GenTree* dst = m_compiler->gtNewLclvNode(rep.LclNum, rep.AccessType); - GenTree* src = m_compiler->gtNewLclFldNode(lclNum, rep.AccessType, rep.Offset); - GenTree* asg = m_compiler->gtNewAssignNode(dst, src); + GenTree* dst = m_compiler->gtNewLclvNode(rep.LclNum, rep.AccessType); + GenTree* src = m_compiler->gtNewLclFldNode(lclNum, rep.AccessType, rep.Offset); + GenTree* asg = m_compiler->gtNewAssignNode(dst, src); Statement* stmt = m_compiler->fgNewStmtFromTree(asg); if (*prevStmt != nullptr) { diff --git a/src/coreclr/jit/promotion.h b/src/coreclr/jit/promotion.h index c5909d77fec6e..19ae9a78c8aa1 100644 --- a/src/coreclr/jit/promotion.h +++ b/src/coreclr/jit/promotion.h @@ -15,6 +15,7 @@ class Promotion friend class ReplaceVisitor; void InsertInitialReadBack(unsigned lclNum, const jitstd::vector& replacements, Statement** prevStmt); + public: explicit Promotion(Compiler* compiler) : m_compiler(compiler) { From 2075c618aa8db1d1c2dfc153a21f95ef4cb92ea7 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 28 Mar 2023 16:02:52 +0200 Subject: [PATCH 27/52] Explicitly zero-init promoted locals with suppressed init The importer may suppress inserting explicit zero initing of some struct locals under the assumption that they will be zeroed by the prolog. However, after promotion of some fields that may no longer be the case for those fields. --- src/coreclr/jit/promotion.cpp | 102 +++++++++++++++++++++------------- src/coreclr/jit/promotion.h | 4 ++ 2 files changed, 66 insertions(+), 40 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index e9612394bf372..0c1422dc48e1b 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -128,12 +128,11 @@ static size_t BinarySearch(const jitstd::vector& vec, unsigned offset) struct Replacement { - unsigned Offset; - var_types AccessType; - unsigned LclNum; - bool NeedsWriteBack = true; - bool NeedsReadBack = false; - Replacement* Next = nullptr; + unsigned Offset; + var_types AccessType; + unsigned LclNum; + bool NeedsWriteBack = true; + bool NeedsReadBack = false; Replacement(unsigned offset, var_types accessType, unsigned lclNum) : Offset(offset), AccessType(accessType), LclNum(lclNum) @@ -305,12 +304,6 @@ class LocalsUses return; } - // We may be relying on zero-init as part of the prolog for the struct - // local. We need to propagate this to all replacement locals too to - // allow downstream phases to properly know that the local may need to - // be zeroed. - - bool suppressedInit = comp->lvaGetDesc(lclNum)->lvSuppressedZeroInit; assert(replacements.size() == 0); for (size_t i = 0; i < m_accesses.size(); i++) { @@ -334,10 +327,10 @@ class LocalsUses char* bufp = new (comp, CMK_DebugOnly) char[len]; strcpy_s(bufp, len, buf); #endif - unsigned newLcl = comp->lvaGrabTemp(false DEBUGARG(bufp)); - LclVarDsc* dsc = comp->lvaGetDesc(newLcl); - dsc->lvType = access.AccessType; - dsc->lvSuppressedZeroInit = suppressedInit; + unsigned newLcl = comp->lvaGrabTemp(false DEBUGARG(bufp)); + LclVarDsc* dsc = comp->lvaGetDesc(newLcl); + dsc->lvType = access.AccessType; + replacements.push_back(Replacement(access.Offset, access.AccessType, newLcl)); } } @@ -1030,20 +1023,22 @@ PhaseStatus Promotion::Run() } } - Statement* prevInitialReadBackStmt = nullptr; - for (unsigned lclNum = 0; lclNum < m_compiler->info.compArgsCount; lclNum++) + Statement* prevStmt = nullptr; + for (unsigned lclNum = 0; lclNum < numLocals; lclNum++) { - InsertInitialReadBack(lclNum, replacements[lclNum], &prevInitialReadBackStmt); - } - - if (m_compiler->opts.IsOSR()) - { - for (unsigned lclNum = m_compiler->info.compArgsCount; lclNum < m_compiler->lvaCount; lclNum++) + LclVarDsc* dsc = m_compiler->lvaGetDesc(lclNum); + if (dsc->lvIsParam || dsc->lvIsOSRLocal) { - if (m_compiler->lvaGetDesc(lclNum)->lvIsOSRLocal) - { - InsertInitialReadBack(lclNum, replacements[lclNum], &prevInitialReadBackStmt); - } + InsertInitialReadBack(lclNum, replacements[lclNum], &prevStmt); + } + else if (dsc->lvSuppressedZeroInit) + { + // We may have suppressed inserting an explicit zero init based on the + // assumption that the entire local will be zero inited in the prolog. + // Now that we are promoting some fields that assumption may be + // invalidated for those fields, and we may need to insert explicit + // zero inits again. + ExplicitlyZeroInitReplacementLocals(lclNum, replacements[lclNum], &prevStmt); } } @@ -1058,21 +1053,48 @@ void Promotion::InsertInitialReadBack(unsigned lclNum, { const Replacement& rep = replacements[i]; - m_compiler->fgEnsureFirstBBisScratch(); + GenTree* dst = m_compiler->gtNewLclvNode(rep.LclNum, rep.AccessType); + GenTree* src = m_compiler->gtNewLclFldNode(lclNum, rep.AccessType, rep.Offset); + GenTree* asg = m_compiler->gtNewAssignNode(dst, src); + InsertInitStatement(prevStmt, asg); + } +} - GenTree* dst = m_compiler->gtNewLclvNode(rep.LclNum, rep.AccessType); - GenTree* src = m_compiler->gtNewLclFldNode(lclNum, rep.AccessType, rep.Offset); - GenTree* asg = m_compiler->gtNewAssignNode(dst, src); - Statement* stmt = m_compiler->fgNewStmtFromTree(asg); - if (*prevStmt != nullptr) - { - m_compiler->fgInsertStmtAfter(m_compiler->fgFirstBB, *prevStmt, stmt); - } - else +void Promotion::ExplicitlyZeroInitReplacementLocals(unsigned lclNum, + const jitstd::vector& replacements, + Statement** prevStmt) +{ + for (unsigned i = 0; i < replacements.size(); i++) + { + const Replacement& rep = replacements[i]; + + if (!m_compiler->fgVarNeedsExplicitZeroInit(rep.LclNum, false, false)) { - m_compiler->fgInsertStmtAtBeg(m_compiler->fgFirstBB, stmt); + // Other downstream code (e.g. recursive tailcalls-to-loops) may + // still need to insert further explicit zero initing. + m_compiler->lvaGetDesc(rep.LclNum)->lvSuppressedZeroInit = true; + continue; } - *prevStmt = stmt; + GenTree* dst = m_compiler->gtNewLclvNode(rep.LclNum, rep.AccessType); + GenTree* src = m_compiler->gtNewZeroConNode(rep.AccessType); + GenTree* asg = m_compiler->gtNewAssignNode(dst, src); + InsertInitStatement(prevStmt, asg); + } +} + +void Promotion::InsertInitStatement(Statement** prevStmt, GenTree* tree) +{ + m_compiler->fgEnsureFirstBBisScratch(); + Statement* stmt = m_compiler->fgNewStmtFromTree(tree); + if (*prevStmt != nullptr) + { + m_compiler->fgInsertStmtAfter(m_compiler->fgFirstBB, *prevStmt, stmt); } + else + { + m_compiler->fgInsertStmtAtBeg(m_compiler->fgFirstBB, stmt); + } + + *prevStmt = stmt; } diff --git a/src/coreclr/jit/promotion.h b/src/coreclr/jit/promotion.h index 19ae9a78c8aa1..d898c2de26a72 100644 --- a/src/coreclr/jit/promotion.h +++ b/src/coreclr/jit/promotion.h @@ -15,6 +15,10 @@ class Promotion friend class ReplaceVisitor; void InsertInitialReadBack(unsigned lclNum, const jitstd::vector& replacements, Statement** prevStmt); + void ExplicitlyZeroInitReplacementLocals(unsigned lclNum, + const jitstd::vector& replacements, + Statement** prevStmt); + void InsertInitStatement(Statement** prevStmt, GenTree* tree); public: explicit Promotion(Compiler* compiler) : m_compiler(compiler) From d2d4401ca4e4960882b5f80c6cb8cfc8e3f10902 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 28 Mar 2023 17:10:37 +0200 Subject: [PATCH 28/52] Fix JITDUMP wrong format specifier --- src/coreclr/jit/promotion.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 0c1422dc48e1b..575e08f3c22db 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -414,7 +414,7 @@ class LocalsUses costWith += countWriteBacksWtd * writeBackCost; JITDUMP("Evaluating access %s @ %03u\n", varTypeName(access.AccessType), access.Offset); - JITDUMP(" Write-back cost: %d\n", writeBackCost); + JITDUMP(" Write-back cost: " FMT_WT "\n", writeBackCost); JITDUMP(" # write backs: " FMT_WT "\n", countWriteBacksWtd); JITDUMP(" # read backs: " FMT_WT "\n", countReadBacksWtd); JITDUMP(" Cost with: " FMT_WT "\n", costWith); From 33089984ff9126e9f824e405d71fa8ae275003ed Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 28 Mar 2023 17:10:53 +0200 Subject: [PATCH 29/52] Disallow tail merging from adding predecessors to the scratch BB --- src/coreclr/jit/fgopt.cpp | 72 +++++++++++++++++++++++---------------- 1 file changed, 42 insertions(+), 30 deletions(-) diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 609d689edf1c1..8d46467b837d2 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -6757,45 +6757,57 @@ PhaseStatus Compiler::fgTailMerge() // for (BasicBlock* const predBlock : block->PredBlocks()) { - if ((predBlock->GetUniqueSucc() == block) && BasicBlock::sameEHRegion(block, predBlock)) + if (predBlock->GetUniqueSucc() != block) { - Statement* lastStmt = predBlock->lastStmt(); + continue; + } - // Block might be empty. - // - if (lastStmt == nullptr) - { - continue; - } + if (!BasicBlock::sameEHRegion(block, predBlock)) + { + continue; + } - // Walk back past any GT_NOPs. - // - Statement* const firstStmt = predBlock->firstStmt(); - while (lastStmt->GetRootNode()->OperIs(GT_NOP)) - { - if (lastStmt == firstStmt) - { - // predBlock is evidently all GT_NOP. - // - lastStmt = nullptr; - break; - } + if (fgBBisScratch(block)) + { + continue; + } - lastStmt = lastStmt->GetPrevStmt(); - } + Statement* lastStmt = predBlock->lastStmt(); - // Block might be effectively empty. - // - if (lastStmt == nullptr) + // Block might be empty. + // + if (lastStmt == nullptr) + { + continue; + } + + // Walk back past any GT_NOPs. + // + Statement* const firstStmt = predBlock->firstStmt(); + while (lastStmt->GetRootNode()->OperIs(GT_NOP)) + { + if (lastStmt == firstStmt) { - continue; + // predBlock is evidently all GT_NOP. + // + lastStmt = nullptr; + break; } - // We don't expect to see PHIs but watch for them anyways. - // - assert(!lastStmt->IsPhiDefnStmt()); - predInfo.Emplace(predBlock, lastStmt); + lastStmt = lastStmt->GetPrevStmt(); } + + // Block might be effectively empty. + // + if (lastStmt == nullptr) + { + continue; + } + + // We don't expect to see PHIs but watch for them anyways. + // + assert(!lastStmt->IsPhiDefnStmt()); + predInfo.Emplace(predBlock, lastStmt); } // Are there enough preds to make it interesting? From 18621492b7e159acef6df4f8b715ed74092b67b3 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 28 Mar 2023 21:19:10 +0200 Subject: [PATCH 30/52] Avoid picking scratch block as victim instead Less conservative (and also the previous check was using the wrong block) --- src/coreclr/jit/fgopt.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 8d46467b837d2..5609734f2f486 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -6767,11 +6767,6 @@ PhaseStatus Compiler::fgTailMerge() continue; } - if (fgBBisScratch(block)) - { - continue; - } - Statement* lastStmt = predBlock->lastStmt(); // Block might be empty. @@ -6897,8 +6892,8 @@ PhaseStatus Compiler::fgTailMerge() JITDUMP("A set of %d preds of " FMT_BB " end with the same tree\n", matchedPredInfo.Height(), block->bbNum); JITDUMPEXEC(gtDispStmt(matchedPredInfo.TopRef(0).m_stmt)); - BasicBlock* crossJumpVictim = matchedPredInfo.TopRef(0).m_block; - Statement* crossJumpStmt = matchedPredInfo.TopRef(0).m_stmt; + BasicBlock* crossJumpVictim = nullptr; + Statement* crossJumpStmt = nullptr; bool haveNoSplitVictim = false; bool haveFallThroughVictim = false; @@ -6908,6 +6903,13 @@ PhaseStatus Compiler::fgTailMerge() Statement* const stmt = info.m_stmt; BasicBlock* const predBlock = info.m_block; + // Never pick the scratch block as the victim as that would + // cause us to add a predecessor to it, which is invalid. + if (fgBBisScratch(predBlock)) + { + continue; + } + bool const isNoSplit = stmt == predBlock->firstStmt(); bool const isFallThrough = (predBlock->bbJumpKind == BBJ_NONE); From baaff752458b77770f1608b838efc2b7aef89ec5 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 28 Mar 2023 23:00:57 +0200 Subject: [PATCH 31/52] Oops --- src/coreclr/jit/fgopt.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 5609734f2f486..76126163154f3 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -6917,7 +6917,12 @@ PhaseStatus Compiler::fgTailMerge() // bool useBlock = false; - if (isNoSplit && isFallThrough) + if (crossJumpVictim == nullptr) + { + // Pick an initial candidate. + useBlock = true; + } + else if (isNoSplit && isFallThrough) { // This is the ideal choice. // From 24773f1209d7f4d438a7d2a583da32f6916df81e Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 29 Mar 2023 11:24:43 +0200 Subject: [PATCH 32/52] Fix gtSplitTree for some void-typed commas --- src/coreclr/jit/gentree.cpp | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 6828eb96d3532..6465d17f38e4c 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -16382,6 +16382,34 @@ bool Compiler::gtSplitTree( return false; } + bool IsValue(const UseInfo& useInf) + { + GenTree* node = (*useInf.Use)->gtEffectiveVal(); + if (!node->IsValue()) + { + return false; + } + + if (node->OperIs(GT_ASG)) + { + return false; + } + + GenTree* user = useInf.User; + + if (user == nullptr) + { + return false; + } + + if (user->OperIs(GT_COMMA) && (&user->AsOp()->gtOp1 == useInf.Use)) + { + return false; + } + + return true; + } + void SplitOutUse(const UseInfo& useInf, bool userIsReturned) { GenTree** use = useInf.Use; @@ -16447,8 +16475,7 @@ bool Compiler::gtSplitTree( } Statement* stmt = nullptr; - if (!(*use)->IsValue() || (*use)->gtEffectiveVal()->OperIs(GT_ASG) || (user == nullptr) || - (user->OperIs(GT_COMMA) && (user->gtGetOp1() == *use))) + if (!IsValue(useInf)) { GenTree* sideEffects = nullptr; m_compiler->gtExtractSideEffList(*use, &sideEffects); From 43d031515ca3ccd324aaf5b97af26bc5525180c3 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 29 Mar 2023 15:10:20 +0200 Subject: [PATCH 33/52] Add to jit-experimental --- eng/pipelines/common/templates/runtimes/run-test-job.yml | 2 ++ src/tests/Common/testenvironment.proj | 3 +++ 2 files changed, 5 insertions(+) diff --git a/eng/pipelines/common/templates/runtimes/run-test-job.yml b/eng/pipelines/common/templates/runtimes/run-test-job.yml index 63366eebd928c..d6c49f05be77b 100644 --- a/eng/pipelines/common/templates/runtimes/run-test-job.yml +++ b/eng/pipelines/common/templates/runtimes/run-test-job.yml @@ -583,6 +583,8 @@ jobs: - jitpartialcompilation - jitpartialcompilation_pgo - jitobjectstackallocation + - jitgeneralizedpromotion + - jitgeneralizedpromotion_full ${{ if in(parameters.testGroup, 'jit-cfg') }}: scenarios: diff --git a/src/tests/Common/testenvironment.proj b/src/tests/Common/testenvironment.proj index eb3656263eadc..7a777e133fb75 100644 --- a/src/tests/Common/testenvironment.proj +++ b/src/tests/Common/testenvironment.proj @@ -49,6 +49,7 @@ DOTNET_JitStress; DOTNET_JitStressProcedureSplitting; DOTNET_JitStressRegs; + DOTNET_JitStressModeNames; DOTNET_TailcallStress; DOTNET_ReadyToRun; DOTNET_ZapDisable; @@ -214,6 +215,8 @@ + + From d9421de5340a0c305e0deba51a3895710f8e6aad Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 29 Mar 2023 15:11:30 +0200 Subject: [PATCH 34/52] Clean up, add function headers, disable by default --- src/coreclr/jit/compiler.cpp | 4 +- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/compphases.h | 2 +- src/coreclr/jit/promotion.cpp | 419 ++++++++++++++++++++++------------ 4 files changed, 273 insertions(+), 154 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index ee5881c298332..4b5d043ddbc91 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4713,7 +4713,9 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // DoPhase(this, PHASE_EARLY_LIVENESS, &Compiler::fgEarlyLiveness); - DoPhase(this, PHASE_NEW_PROMOTE_STRUCTS, &Compiler::PromoteStructsNew); + // Promote more struct locals + // + DoPhase(this, PHASE_GENERALIZED_PROMOTION, &Compiler::GeneralizedPromotion); // Run a simple forward substitution pass. // diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index fd54dc8a78e78..2fac604a25f51 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6068,7 +6068,7 @@ class Compiler PhaseStatus fgMarkAddressExposedLocals(); void fgSequenceLocals(Statement* stmt); - PhaseStatus PromoteStructsNew(); + PhaseStatus GeneralizedPromotion(); PhaseStatus fgForwardSub(); bool fgForwardSubBlock(BasicBlock* block); diff --git a/src/coreclr/jit/compphases.h b/src/coreclr/jit/compphases.h index 2268ee0a2dd89..6936a0e03a0d0 100644 --- a/src/coreclr/jit/compphases.h +++ b/src/coreclr/jit/compphases.h @@ -43,7 +43,7 @@ CompPhaseNameMacro(PHASE_UPDATE_FINALLY_FLAGS, "Update finally target flag CompPhaseNameMacro(PHASE_EARLY_UPDATE_FLOW_GRAPH, "Update flow graph early pass", false, -1, false) CompPhaseNameMacro(PHASE_STR_ADRLCL, "Morph - Structs/AddrExp", false, -1, false) CompPhaseNameMacro(PHASE_EARLY_LIVENESS, "Early liveness", false, -1, false) -CompPhaseNameMacro(PHASE_NEW_PROMOTE_STRUCTS, "Generalized promotion", false, -1, false) +CompPhaseNameMacro(PHASE_GENERALIZED_PROMOTION, "Generalized promotion", false, -1, false) CompPhaseNameMacro(PHASE_FWD_SUB, "Forward Substitution", false, -1, false) CompPhaseNameMacro(PHASE_MORPH_IMPBYREF, "Morph - ByRefs", false, -1, false) CompPhaseNameMacro(PHASE_PROMOTE_STRUCTS, "Morph - Promote Structs", false, -1, false) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 575e08f3c22db..741a1dab369b2 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -2,7 +2,13 @@ #include "promotion.h" #include "jitstd/algorithm.h" -PhaseStatus Compiler::PromoteStructsNew() +//------------------------------------------------------------------------ +// GeneralizedPromotion: Promote structs based on primitive access patterns. +// +// Returns: +// Suitable phase status. +// +PhaseStatus Compiler::GeneralizedPromotion() { if (!opts.OptEnabled(CLFLG_STRUCTPROMOTE)) { @@ -14,10 +20,10 @@ PhaseStatus Compiler::PromoteStructsNew() return PhaseStatus::MODIFIED_NOTHING; } -// if (!compStressCompile(STRESS_GENERALIZED_PROMOTION, 25)) -//{ -// return PhaseStatus::MODIFIED_NOTHING; -//} + if (!compStressCompile(STRESS_GENERALIZED_PROMOTION, 25)) + { + return PhaseStatus::MODIFIED_NOTHING; + } #ifdef DEBUG static ConfigMethodRange s_range; @@ -33,6 +39,7 @@ PhaseStatus Compiler::PromoteStructsNew() return prom.Run(); } +// Represents an access into a struct local. struct Access { ClassLayout* Layout; @@ -45,26 +52,20 @@ struct Access unsigned CountAssignmentSource = 0; // Number of times this access is on the LHS of an assignment. unsigned CountAssignmentDestination = 0; - // Number of times this access is on the RHS of an assignment where the LHS is a probable register candidate. - unsigned CountAssignmentsToRegisterCandidate = 0; - // Number of times this access is on the LHS of an assignment where the RHS is a probable register candidate. - unsigned CountAssignmentsFromRegisterCandidate = 0; - unsigned CountCallArgs = 0; - unsigned CountCallArgsByImplicitRef = 0; - unsigned CountCallArgsOnStack = 0; - unsigned CountReturns = 0; - unsigned CountPassedAsRetbuf = 0; - - weight_t CountWtd = 0; - weight_t CountAssignmentSourceWtd = 0; - weight_t CountAssignmentDestinationWtd = 0; - weight_t CountAssignmentsToRegisterCandidateWtd = 0; - weight_t CountAssignmentsFromRegisterCandidateWtd = 0; - weight_t CountCallArgsWtd = 0; - weight_t CountCallArgsByImplicitRefWtd = 0; - weight_t CountCallArgsOnStackWtd = 0; - weight_t CountReturnsWtd = 0; - weight_t CountPassedAsRetbufWtd = 0; + unsigned CountCallArgs = 0; + unsigned CountCallArgsByImplicitRef = 0; + unsigned CountCallArgsOnStack = 0; + unsigned CountReturns = 0; + unsigned CountPassedAsRetbuf = 0; + + weight_t CountWtd = 0; + weight_t CountAssignmentSourceWtd = 0; + weight_t CountAssignmentDestinationWtd = 0; + weight_t CountCallArgsWtd = 0; + weight_t CountCallArgsByImplicitRefWtd = 0; + weight_t CountCallArgsOnStackWtd = 0; + weight_t CountReturnsWtd = 0; + weight_t CountPassedAsRetbufWtd = 0; Access(unsigned offset, var_types accessType, ClassLayout* layout) : Layout(layout), Offset(offset), AccessType(accessType) @@ -77,6 +78,7 @@ struct Access } bool Overlaps(unsigned otherStart, unsigned otherSize) const + { unsigned end = Offset + GetAccessSize(); if (end <= otherStart) @@ -94,8 +96,19 @@ struct Access } }; -// Find first entry with an equal offset, or bitwise complement of first -// entry with a higher offset. +//------------------------------------------------------------------------ +// BinarySearch: +// Find first entry with an equal offset, or bitwise complement of first +// entry with a higher offset. +// +// Parameters: +// vec - The vector to binary search in +// offset - The offset to search for +// +// Returns: +// Index of the first entry with an equal offset, or bitwise complement of +// first entry with a higher offset. +// template static size_t BinarySearch(const jitstd::vector& vec, unsigned offset) { @@ -126,13 +139,16 @@ static size_t BinarySearch(const jitstd::vector& vec, unsigned offset) return ~min; } +// Represents a single replacement of a (field) access into a struct local. struct Replacement { unsigned Offset; var_types AccessType; unsigned LclNum; - bool NeedsWriteBack = true; - bool NeedsReadBack = false; + // Is the replacement local (given by LclNum) fresher than the value in the struct local? + bool NeedsWriteBack = true; + // Is the value in the struct local fresher than the replacement local? + bool NeedsReadBack = false; Replacement(unsigned offset, var_types accessType, unsigned lclNum) : Offset(offset), AccessType(accessType), LclNum(lclNum) @@ -159,16 +175,14 @@ struct Replacement enum class AccessKindFlags : uint32_t { - None = 0, - IsCallArg = 1, - IsAssignmentSource = 2, - IsAssignmentDestination = 4, - IsAssignmentToRegisterCandidate = 8, - IsAssignmentFromRegisterCandidate = 16, - IsCallArgByImplicitRef = 32, - IsCallArgOnStack = 64, - IsCallRetBuf = 128, - IsReturned = 256, + None = 0, + IsCallArg = 1, + IsAssignmentSource = 2, + IsAssignmentDestination = 4, + IsCallArgByImplicitRef = 8, + IsCallArgOnStack = 16, + IsCallRetBuf = 32, + IsReturned = 64, }; inline constexpr AccessKindFlags operator~(AccessKindFlags a) @@ -196,6 +210,7 @@ inline AccessKindFlags& operator&=(AccessKindFlags& a, AccessKindFlags b) return a = (AccessKindFlags)((uint32_t)a & (uint32_t)b); } +// Tracks all the accesses into one particular struct local. class LocalsUses { jitstd::vector m_accesses; @@ -205,6 +220,17 @@ class LocalsUses { } + //------------------------------------------------------------------------ + // RecordAccess: + // Record an access into this local with the specified offset and access type. + // + // Parameters: + // offs - The offset being accessed + // accessType - The type of the access + // accessLayout - The layout of the access, for accessType == TYP_STRUCT + // flags - Flags classifying the access + // weight - Weight of the block containing the access + // void RecordAccess( unsigned offs, var_types accessType, ClassLayout* accessLayout, AccessKindFlags flags, weight_t weight) { @@ -246,24 +272,12 @@ class LocalsUses { access->CountAssignmentSource++; access->CountAssignmentSourceWtd += weight; - - if ((flags & AccessKindFlags::IsAssignmentToRegisterCandidate) != AccessKindFlags::None) - { - access->CountAssignmentsToRegisterCandidate++; - access->CountAssignmentsToRegisterCandidateWtd += weight; - } } if ((flags & AccessKindFlags::IsAssignmentDestination) != AccessKindFlags::None) { access->CountAssignmentDestination++; access->CountAssignmentDestinationWtd += weight; - - if ((flags & AccessKindFlags::IsAssignmentFromRegisterCandidate) != AccessKindFlags::None) - { - access->CountAssignmentsFromRegisterCandidate++; - access->CountAssignmentsFromRegisterCandidateWtd += weight; - } } if ((flags & AccessKindFlags::IsCallArg) != AccessKindFlags::None) @@ -297,6 +311,16 @@ class LocalsUses } } + //------------------------------------------------------------------------ + // PickPromotions: + // Pick specific replacements to make for this struct local after a set + // of accesses have been recorded. + // + // Parameters: + // comp - Compiler instance + // lclNum - Local num for this struct local + // replacements - [out] Vector to insert replacements into + // void PickPromotions(Compiler* comp, unsigned lclNum, jitstd::vector& replacements) { if (m_accesses.size() <= 0) @@ -335,13 +359,20 @@ class LocalsUses } } + //------------------------------------------------------------------------ + // EvaluateReplacement: + // Evaluate legality and profitability of a single replacement candidate. + // + // Parameters: + // comp - Compiler instance + // lclNum - Local num for this struct local + // access - Access information for the candidate. + // + // Returns: + // True if we should promote this access and create a replacement; otherwise false. + // bool EvaluateReplacement(Compiler* comp, unsigned lclNum, const Access& access) { - unsigned countOverlappedCalls = 0; - unsigned countOverlappedReturns = 0; - unsigned countOverlappedRetbufs = 0; - unsigned countOverlappedAssignmentDestination = 0; - unsigned countOverlappedAssignmentSource = 0; weight_t countOverlappedCallsWtd = 0; weight_t countOverlappedReturnsWtd = 0; weight_t countOverlappedRetbufsWtd = 0; @@ -364,12 +395,6 @@ class LocalsUses return false; } - countOverlappedCalls += otherAccess.CountCallArgs; - countOverlappedReturns += otherAccess.CountReturns; - countOverlappedRetbufs += otherAccess.CountPassedAsRetbuf; - countOverlappedAssignmentDestination += otherAccess.CountAssignmentDestination; - countOverlappedAssignmentSource += otherAccess.CountAssignmentSource; - countOverlappedCallsWtd += otherAccess.CountCallArgsWtd; countOverlappedReturnsWtd += otherAccess.CountReturnsWtd; countOverlappedRetbufsWtd += otherAccess.CountPassedAsRetbufWtd; @@ -377,13 +402,15 @@ class LocalsUses countOverlappedAssignmentSourceWtd += otherAccess.CountAssignmentSourceWtd; } + // TODO-CQ: Tune the following heuristics. Currently they are based on + // x64 code size although using BB weights when available. weight_t costWithout = 0; + // A normal access without promotion looks like: // mov reg, [reg+offs] // It may also be contained. Overall we are going to cost each use of // an unpromoted local at 6.5 bytes. // TODO: We can make much better guesses on what will and won't be contained. - costWithout += access.CountWtd * 6.5; weight_t costWith = 0; @@ -402,7 +429,7 @@ class LocalsUses countReadBacksWtd += countOverlappedRetbufsWtd; countReadBacksWtd += countOverlappedAssignmentDestinationWtd; - // A read back puts the value from stack back to register. We cost it at 5 bytes. + // A read back puts the value from stack back to (hopefully) register. We cost it at 5 bytes. costWith += countReadBacksWtd * 5; // Write backs with TYP_REFs when the base local is an implicit byref @@ -414,9 +441,9 @@ class LocalsUses costWith += countWriteBacksWtd * writeBackCost; JITDUMP("Evaluating access %s @ %03u\n", varTypeName(access.AccessType), access.Offset); - JITDUMP(" Write-back cost: " FMT_WT "\n", writeBackCost); - JITDUMP(" # write backs: " FMT_WT "\n", countWriteBacksWtd); - JITDUMP(" # read backs: " FMT_WT "\n", countReadBacksWtd); + JITDUMP(" Single write-back cost: " FMT_WT "\n", writeBackCost); + JITDUMP(" Write backs: " FMT_WT "\n", countWriteBacksWtd); + JITDUMP(" Read backs: " FMT_WT "\n", countReadBacksWtd); JITDUMP(" Cost with: " FMT_WT "\n", costWith); JITDUMP(" Cost without: " FMT_WT "\n", costWithout); @@ -478,11 +505,12 @@ class LocalsUses #endif }; +// Visitor that records information about uses of struct locals. class LocalsUseVisitor : public GenTreeVisitor { Promotion* m_prom; LocalsUses* m_uses; - BasicBlock* m_curBB; + BasicBlock* m_curBB = nullptr; public: enum @@ -498,11 +526,25 @@ class LocalsUseVisitor : public GenTreeVisitor new (&m_uses[i], jitstd::placement_t()) LocalsUses(prom->m_compiler); } + //------------------------------------------------------------------------ + // SetBB: + // Set current BB we are visiting. Used to get BB weights for access costing. + // + // Parameters: + // bb - The current basic block. + // void SetBB(BasicBlock* bb) { m_curBB = bb; } + //------------------------------------------------------------------------ + // GetUsesByLocal: + // Get the uses information for a specified local. + // + // Parameters: + // bb - The current basic block. + // LocalsUses* GetUsesByLocal(unsigned lcl) { return &m_uses[lcl]; @@ -541,6 +583,18 @@ class LocalsUseVisitor : public GenTreeVisitor return fgWalkResult::WALK_CONTINUE; } +private: + //------------------------------------------------------------------------ + // ClassifyLocalAccess: + // Given a local use and its user, classify information about it. + // + // Parameters: + // lcl - The local + // user - The user of the local. + // + // Returns: + // Flags classifying the access. + // AccessKindFlags ClassifyLocalAccess(GenTreeLclVarCommon* lcl, GenTree* user) { AccessKindFlags flags = AccessKindFlags::None; @@ -595,29 +649,11 @@ class LocalsUseVisitor : public GenTreeVisitor if (user->gtGetOp1() == lcl) { flags |= AccessKindFlags::IsAssignmentDestination; - - if (user->gtGetOp2()->OperIs(GT_LCL_VAR)) - { - LclVarDsc* dsc = m_compiler->lvaGetDesc(user->gtGetOp2()->AsLclVarCommon()); - if ((dsc->TypeGet() != TYP_STRUCT) && !dsc->IsAddressExposed()) - { - flags |= AccessKindFlags::IsAssignmentFromRegisterCandidate; - } - } } if (user->gtGetOp2() == lcl) { flags |= AccessKindFlags::IsAssignmentSource; - - if (user->gtGetOp1()->OperIs(GT_LCL_VAR)) - { - LclVarDsc* dsc = m_compiler->lvaGetDesc(user->gtGetOp1()->AsLclVarCommon()); - if ((dsc->TypeGet() != TYP_STRUCT) && !dsc->IsAddressExposed()) - { - flags |= AccessKindFlags::IsAssignmentToRegisterCandidate; - } - } } } @@ -635,12 +671,7 @@ class ReplaceVisitor : public GenTreeVisitor { Promotion* m_prom; jitstd::vector* m_replacements; - BasicBlock* m_bb; - Statement* m_stmt; - unsigned m_seenAsgs; - unsigned m_seenAsgToLcl; - bool m_madeChanges; - Statement* m_lastStmt; + bool m_madeChanges = false; public: enum @@ -658,44 +689,37 @@ class ReplaceVisitor : public GenTreeVisitor { return m_madeChanges; } - Statement* GetNextStatement() - { - return m_lastStmt->GetNextStmt(); - } - void Reset(BasicBlock* bb, Statement* stmt) + void Reset() { - m_bb = bb; - m_stmt = stmt; - m_seenAsgs = 0; m_madeChanges = false; - m_lastStmt = stmt; } fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) { GenTree* tree = *use; - // We handle all cases where we can see struct uses up front. if (tree->OperIs(GT_ASG)) { // Assignments can be decomposed directly into accesses of the replacements. - return DecomposeAssignment(use, user); + DecomposeAssignment((*use)->AsOp(), user); + return fgWalkResult::WALK_CONTINUE; } if (tree->OperIs(GT_CALL)) { // Calls need to store replacements back into the struct local for args // and need to restore replacements from the result (for - // retbufs/returns). Lowering handles optimizing out the unnecessary copies. - return LoadStoreAroundCall(use, user); + // retbufs/returns). + LoadStoreAroundCall((*use)->AsCall(), user); + return fgWalkResult::WALK_CONTINUE; } if (tree->OperIs(GT_RETURN)) { // Returns need to store replacements back into the struct local. - // Lowering will optimize away these copies when possible. - return StoreBeforeReturn((*use)->AsUnOp()); + StoreBeforeReturn((*use)->AsUnOp()); + return fgWalkResult::WALK_CONTINUE; } if (tree->OperIs(GT_LCL_VAR, GT_LCL_FLD)) @@ -707,10 +731,16 @@ class ReplaceVisitor : public GenTreeVisitor return fgWalkResult::WALK_CONTINUE; } - fgWalkResult DecomposeAssignment(GenTree** use, GenTree* user) + //------------------------------------------------------------------------ + // DecomposeAssignment: + // Handle an assignment that may be between struct locals with replacements. + // + // Parameters: + // asg - The assignment + // user - The user of the assignment. + // + void DecomposeAssignment(GenTreeOp* asg, GenTree* user) { - GenTreeOp* asg = (*use)->AsOp(); - // TODO-CQ: field-by-field copies and inits. if (asg->gtGetOp2()->OperIs(GT_LCL_VAR, GT_LCL_FLD)) @@ -732,14 +762,19 @@ class ReplaceVisitor : public GenTreeVisitor MarkForReadBack(lhsLcl->GetLclNum(), lhsLcl->GetLclOffs(), size, true); } } - - return fgWalkResult::WALK_CONTINUE; } - fgWalkResult LoadStoreAroundCall(GenTree** use, GenTree* user) + //------------------------------------------------------------------------ + // LoadStoreAroundCall: + // Handle a call that may involve struct local arguments and that may + // pass a struct local with replacements as the retbuf. + // + // Parameters: + // call - The call + // user - The user of the call. + // + void LoadStoreAroundCall(GenTreeCall* call, GenTree* user) { - GenTreeCall* call = (*use)->AsCall(); - CallArg* retBufArg = nullptr; for (CallArg& arg : call->gtArgs.Args()) { @@ -772,10 +807,27 @@ class ReplaceVisitor : public GenTreeVisitor MarkForReadBack(retBufLcl->GetLclNum(), retBufLcl->GetLclOffs(), size); } - - return fgWalkResult::WALK_CONTINUE; } + //------------------------------------------------------------------------ + // ReplaceLocal: + // Handle a local that may need to be replaced. + // + // Parameters: + // use - The use of the local + // user - The user of the local. + // + // Notes: + // This usually amounts to making replacing like + // + // LCL_FLD int V00 [+8] -> LCL_VAR int V10. + // + // In some cases we may have a pending read back, meaning that the + // replacement local is out-of-date compared to the struct local. + // In that case we also need to insert IR to read it back. + // This happens for example if the struct local was just assigned from a + // call or via a block copy. + // void ReplaceLocal(GenTree** use, GenTree* user) { GenTreeLclVarCommon* lcl = (*use)->AsLclVarCommon(); @@ -810,40 +862,47 @@ class ReplaceVisitor : public GenTreeVisitor } #endif - if (accessType != TYP_STRUCT) + if (accessType == TYP_STRUCT) { - size_t index = BinarySearch(replacements, offs); - if ((ssize_t)index >= 0) - { - Replacement& rep = replacements[index]; - assert(accessType == rep.AccessType); - JITDUMP(" ..replaced with promoted lcl V%02u\n", rep.LclNum); - *use = m_compiler->gtNewLclvNode(rep.LclNum, accessType); + // Will be handled once we get to the parent. + return; + } - if ((lcl->gtFlags & GTF_VAR_DEF) != 0) - { - rep.NeedsWriteBack = true; - rep.NeedsReadBack = false; - } - else if (rep.NeedsReadBack) - { - GenTree* dst = m_compiler->gtNewLclvNode(rep.LclNum, rep.AccessType); - GenTree* src = m_compiler->gtNewLclFldNode(lclNum, rep.AccessType, rep.Offset); - *use = m_compiler->gtNewOperNode(GT_COMMA, (*use)->TypeGet(), m_compiler->gtNewAssignNode(dst, src), - *use); - rep.NeedsReadBack = false; - } + size_t index = BinarySearch(replacements, offs); + assert((ssize_t)index >= 0); + Replacement& rep = replacements[index]; + assert(accessType == rep.AccessType); + JITDUMP(" ..replaced with promoted lcl V%02u\n", rep.LclNum); + *use = m_compiler->gtNewLclvNode(rep.LclNum, accessType); - m_madeChanges = true; - } + if ((lcl->gtFlags & GTF_VAR_DEF) != 0) + { + rep.NeedsWriteBack = true; + rep.NeedsReadBack = false; + } + else if (rep.NeedsReadBack) + { + GenTree* dst = m_compiler->gtNewLclvNode(rep.LclNum, rep.AccessType); + GenTree* src = m_compiler->gtNewLclFldNode(lclNum, rep.AccessType, rep.Offset); + *use = m_compiler->gtNewOperNode(GT_COMMA, (*use)->TypeGet(), m_compiler->gtNewAssignNode(dst, src), *use); + rep.NeedsReadBack = false; } + + m_madeChanges = true; } - fgWalkResult StoreBeforeReturn(GenTreeUnOp* ret) + //------------------------------------------------------------------------ + // StoreBeforeReturn: + // Handle a return of a potential struct local. + // + // Parameters: + // ret - The GT_RETURN node + // + void StoreBeforeReturn(GenTreeUnOp* ret) { if (ret->TypeIs(TYP_VOID) || !ret->gtGetOp1()->OperIs(GT_LCL_VAR, GT_LCL_FLD)) { - return fgWalkResult::WALK_CONTINUE; + return; } GenTreeLclVarCommon* retLcl = ret->gtGetOp1()->AsLclVarCommon(); @@ -852,11 +911,19 @@ class ReplaceVisitor : public GenTreeVisitor unsigned size = retLcl->GetLayout(m_compiler)->GetSize(); WriteBackBefore(&ret->gtOp1, retLcl->GetLclNum(), retLcl->GetLclOffs(), size); } - - return fgWalkResult::WALK_CONTINUE; } - // Write back all overlapping replacement fields in the specified range. + //------------------------------------------------------------------------ + // WriteBackBefore: + // Update the use with IR that writes back all necessary overlapping + // replacements into a struct local. + // + // Parameters: + // use - The use, which will be updated with a cascasing comma trees of assignments + // lcl - The struct local + // offs - The starting offset into the struct local of the overlapping range to write back to + // size - The size of the overlapping range + // void WriteBackBefore(GenTree** use, unsigned lcl, unsigned offs, unsigned size) { jitstd::vector replacements = m_replacements[lcl]; @@ -892,6 +959,19 @@ class ReplaceVisitor : public GenTreeVisitor } } + //------------------------------------------------------------------------ + // MarkForReadBack: + // Mark that replacements in the specified struct locals need to be read + // back before their next use. + // + // Parameters: + // lcl - The struct local + // offs - The starting offset of the range in the struct local that needs to be read back from. + // size - The size of the range + // conservative - Whether this is a potentially conservative read back + // that we can handle more efficiently in the future (only used for + // logging purposes) + // void MarkForReadBack(unsigned lcl, unsigned offs, unsigned size, bool conservative = false) { jitstd::vector& replacements = m_replacements[lcl]; @@ -917,13 +997,20 @@ class ReplaceVisitor : public GenTreeVisitor if (conservative) { - JITDUMP("*** NYI: Conservatively marking as read-back\n"); + JITDUMP("*** NYI: Conservatively marked as read-back\n"); conservative = false; } } } }; +//------------------------------------------------------------------------ +// Promotion::Run: +// Run the promotion phase. +// +// Returns: +// Suitable phase status. +// PhaseStatus Promotion::Run() { if (m_compiler->lvaCount <= 0) @@ -931,6 +1018,7 @@ PhaseStatus Promotion::Run() return PhaseStatus::MODIFIED_NOTHING; } + // First collect information about uses of locals LocalsUseVisitor localsUse(this); for (BasicBlock* bb : m_compiler->Blocks()) { @@ -955,6 +1043,7 @@ PhaseStatus Promotion::Run() } #endif + // Pick promotion based on the use information we just collected. bool anyReplacements = false; jitstd::vector* replacements = reinterpret_cast*>( new (m_compiler, CMK_Promotion) char[sizeof(jitstd::vector) * m_compiler->lvaCount]); @@ -964,15 +1053,18 @@ PhaseStatus Promotion::Run() jitstd::vector(m_compiler->getAllocator(CMK_Promotion)); localsUse.GetUsesByLocal(i)->PickPromotions(m_compiler, i, replacements[i]); + if (replacements[i].size() > 0) { + anyReplacements = true; +#ifdef DEBUG JITDUMP("V%02u promoted with %d replacements\n", i, (int)replacements[i].size()); for (const Replacement& rep : replacements[i]) { JITDUMP(" [%03u..%03u) promoted as %s V%02u\n", rep.Offset, rep.Offset + genTypeSize(rep.AccessType), varTypeName(rep.AccessType), rep.LclNum); - anyReplacements = true; } +#endif } } @@ -984,10 +1076,10 @@ PhaseStatus Promotion::Run() ReplaceVisitor replacer(this, replacements); for (BasicBlock* bb : m_compiler->Blocks()) { - for (Statement* stmt = bb->firstStmt(); stmt != nullptr;) + for (Statement* stmt : bb->Statements()) { DISPSTMT(stmt); - replacer.Reset(bb, stmt); + replacer.Reset(); replacer.WalkTree(stmt->GetRootNodePointer(), nullptr); if (replacer.MadeChanges()) @@ -996,8 +1088,6 @@ PhaseStatus Promotion::Run() JITDUMP("New statement:\n"); DISPSTMT(stmt); } - - stmt = replacer.GetNextStatement(); } for (unsigned i = 0; i < numLocals; i++) @@ -1045,6 +1135,15 @@ PhaseStatus Promotion::Run() return PhaseStatus::MODIFIED_EVERYTHING; } +//------------------------------------------------------------------------ +// Promotion::InsertInitialReadBack: +// Insert IR to initially read a struct local's value into its promoted field locals. +// +// Parameters: +// lclNum - The struct local +// replacements - Replacements for the struct local +// prevStmt - [in, out] Previous statement to insert after +// void Promotion::InsertInitialReadBack(unsigned lclNum, const jitstd::vector& replacements, Statement** prevStmt) @@ -1060,6 +1159,15 @@ void Promotion::InsertInitialReadBack(unsigned lclNum, } } +//------------------------------------------------------------------------ +// Promotion::ExplicitlyZeroInitReplacementLocals: +// Insert IR to zero out replacement locals if necessary. +// +// Parameters: +// lclNum - The struct local +// replacements - Replacements for the struct local +// prevStmt - [in, out] Previous statement to insert after +// void Promotion::ExplicitlyZeroInitReplacementLocals(unsigned lclNum, const jitstd::vector& replacements, Statement** prevStmt) @@ -1083,6 +1191,15 @@ void Promotion::ExplicitlyZeroInitReplacementLocals(unsigned } } +//------------------------------------------------------------------------ +// Promotion::InsertInitStatement: +// Insert a new statement after the specified statement in the scratch block, +// or at the beginning of the scratch block if no other statements were +// +// Parameters: +// prevStmt - [in, out] Previous statement to insert after +// tree - Tree to create statement from +// void Promotion::InsertInitStatement(Statement** prevStmt, GenTree* tree) { m_compiler->fgEnsureFirstBBisScratch(); From 3a3950d4adf739b783d975bb385db115edfcaa1c Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 29 Mar 2023 15:45:51 +0200 Subject: [PATCH 35/52] Fix assert, fix visit order for LHS of ASGs --- src/coreclr/jit/promotion.cpp | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 741a1dab369b2..b7870ea6386d8 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -20,10 +20,10 @@ PhaseStatus Compiler::GeneralizedPromotion() return PhaseStatus::MODIFIED_NOTHING; } - if (!compStressCompile(STRESS_GENERALIZED_PROMOTION, 25)) - { - return PhaseStatus::MODIFIED_NOTHING; - } + //if (!compStressCompile(STRESS_GENERALIZED_PROMOTION, 25)) + //{ + // return PhaseStatus::MODIFIED_NOTHING; + //} #ifdef DEBUG static ConfigMethodRange s_range; @@ -701,6 +701,13 @@ class ReplaceVisitor : public GenTreeVisitor if (tree->OperIs(GT_ASG)) { + // If LHS of the ASG was a local then we skipped it as we don't + // want to see it until after the RHS. + if (tree->gtGetOp1()->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + ReplaceLocal(&tree->AsOp()->gtOp1, tree); + } + // Assignments can be decomposed directly into accesses of the replacements. DecomposeAssignment((*use)->AsOp(), user); return fgWalkResult::WALK_CONTINUE; @@ -722,7 +729,9 @@ class ReplaceVisitor : public GenTreeVisitor return fgWalkResult::WALK_CONTINUE; } - if (tree->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + // Skip the local on the LHS of ASGs when we see it in the normal tree + // visit; we handle it as part of the parent ASG instead. + if (tree->OperIs(GT_LCL_VAR, GT_LCL_FLD) && ((user == nullptr) || !user->OperIs(GT_ASG) || (user->gtGetOp1() != tree))) { ReplaceLocal(use, user); return fgWalkResult::WALK_CONTINUE; @@ -869,7 +878,12 @@ class ReplaceVisitor : public GenTreeVisitor } size_t index = BinarySearch(replacements, offs); - assert((ssize_t)index >= 0); + if ((ssize_t)index < 0) + { + // Access that we don't have a replacement for. + return; + } + Replacement& rep = replacements[index]; assert(accessType == rep.AccessType); JITDUMP(" ..replaced with promoted lcl V%02u\n", rep.LclNum); From 9a36a5a21f39bb6291d80b8be9c0bf0f85f53874 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 29 Mar 2023 15:47:51 +0200 Subject: [PATCH 36/52] Run jit-format --- src/coreclr/jit/promotion.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index b7870ea6386d8..1d01d56969b39 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -20,10 +20,10 @@ PhaseStatus Compiler::GeneralizedPromotion() return PhaseStatus::MODIFIED_NOTHING; } - //if (!compStressCompile(STRESS_GENERALIZED_PROMOTION, 25)) - //{ - // return PhaseStatus::MODIFIED_NOTHING; - //} +// if (!compStressCompile(STRESS_GENERALIZED_PROMOTION, 25)) +//{ +// return PhaseStatus::MODIFIED_NOTHING; +//} #ifdef DEBUG static ConfigMethodRange s_range; @@ -731,7 +731,8 @@ class ReplaceVisitor : public GenTreeVisitor // Skip the local on the LHS of ASGs when we see it in the normal tree // visit; we handle it as part of the parent ASG instead. - if (tree->OperIs(GT_LCL_VAR, GT_LCL_FLD) && ((user == nullptr) || !user->OperIs(GT_ASG) || (user->gtGetOp1() != tree))) + if (tree->OperIs(GT_LCL_VAR, GT_LCL_FLD) && + ((user == nullptr) || !user->OperIs(GT_ASG) || (user->gtGetOp1() != tree))) { ReplaceLocal(use, user); return fgWalkResult::WALK_CONTINUE; From 9424e6f7afb885a441f5dea9210fb1db8cd226c5 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 29 Mar 2023 16:09:29 +0200 Subject: [PATCH 37/52] Create some data structures lazily --- src/coreclr/jit/promotion.cpp | 137 +++++++++++++++++++++++----------- 1 file changed, 94 insertions(+), 43 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 1d01d56969b39..be06312ef33da 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -78,7 +78,6 @@ struct Access } bool Overlaps(unsigned otherStart, unsigned otherSize) const - { unsigned end = Offset + GetAccessSize(); if (end <= otherStart) @@ -211,12 +210,12 @@ inline AccessKindFlags& operator&=(AccessKindFlags& a, AccessKindFlags b) } // Tracks all the accesses into one particular struct local. -class LocalsUses +class LocalUses { jitstd::vector m_accesses; public: - LocalsUses(Compiler* comp) : m_accesses(comp->getAllocator(CMK_Promotion)) + LocalUses(Compiler* comp) : m_accesses(comp->getAllocator(CMK_Promotion)) { } @@ -319,16 +318,16 @@ class LocalsUses // Parameters: // comp - Compiler instance // lclNum - Local num for this struct local - // replacements - [out] Vector to insert replacements into + // replacements - [out] Pointer to vector to create and insert replacements into // - void PickPromotions(Compiler* comp, unsigned lclNum, jitstd::vector& replacements) + void PickPromotions(Compiler* comp, unsigned lclNum, jitstd::vector** replacements) { if (m_accesses.size() <= 0) { return; } - assert(replacements.size() == 0); + assert(*replacements == nullptr); for (size_t i = 0; i < m_accesses.size(); i++) { const Access& access = m_accesses[i]; @@ -355,7 +354,13 @@ class LocalsUses LclVarDsc* dsc = comp->lvaGetDesc(newLcl); dsc->lvType = access.AccessType; - replacements.push_back(Replacement(access.Offset, access.AccessType, newLcl)); + if (*replacements == nullptr) + { + *replacements = + new (comp, CMK_Promotion) jitstd::vector(comp->getAllocator(CMK_Promotion)); + } + + (*replacements)->push_back(Replacement(access.Offset, access.AccessType, newLcl)); } } @@ -509,7 +514,7 @@ class LocalsUses class LocalsUseVisitor : public GenTreeVisitor { Promotion* m_prom; - LocalsUses* m_uses; + LocalUses** m_uses; BasicBlock* m_curBB = nullptr; public: @@ -520,10 +525,7 @@ class LocalsUseVisitor : public GenTreeVisitor LocalsUseVisitor(Promotion* prom) : GenTreeVisitor(prom->m_compiler), m_prom(prom) { - m_uses = reinterpret_cast( - new (prom->m_compiler, CMK_Promotion) char[prom->m_compiler->lvaCount * sizeof(LocalsUses)]); - for (size_t i = 0; i < prom->m_compiler->lvaCount; i++) - new (&m_uses[i], jitstd::placement_t()) LocalsUses(prom->m_compiler); + m_uses = new (prom->m_compiler, CMK_Promotion) LocalUses*[prom->m_compiler->lvaCount]{}; } //------------------------------------------------------------------------ @@ -545,9 +547,13 @@ class LocalsUseVisitor : public GenTreeVisitor // Parameters: // bb - The current basic block. // - LocalsUses* GetUsesByLocal(unsigned lcl) + // Returns: + // Information about uses, or null if this local has no uses information + // associated with it. + // + LocalUses* GetUsesByLocal(unsigned lcl) { - return &m_uses[lcl]; + return m_uses[lcl]; } fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) @@ -574,8 +580,8 @@ class LocalsUseVisitor : public GenTreeVisitor var_types accessType = lcl->TypeGet(); ClassLayout* accessLayout = accessType == TYP_STRUCT ? lcl->GetLayout(m_compiler) : nullptr; AccessKindFlags accessFlags = ClassifyLocalAccess(lcl, user); - m_uses[lcl->GetLclNum()].RecordAccess(offs, accessType, accessLayout, accessFlags, - m_curBB->getBBWeight(m_compiler)); + GetOrCreateUses(lcl->GetLclNum()) + ->RecordAccess(offs, accessType, accessLayout, accessFlags, m_curBB->getBBWeight(m_compiler)); } } } @@ -584,6 +590,25 @@ class LocalsUseVisitor : public GenTreeVisitor } private: + //------------------------------------------------------------------------ + // GetOrCreateUses: + // Get the uses information for a local. Create it if it does not already exist. + // + // Parameters: + // lclNum - The local + // + // Returns: + // Uses information. + // + LocalUses* GetOrCreateUses(unsigned lclNum) + { + if (m_uses[lclNum] == nullptr) + { + m_uses[lclNum] = new (m_compiler, CMK_Promotion) LocalUses(m_compiler); + } + + return m_uses[lclNum]; + } //------------------------------------------------------------------------ // ClassifyLocalAccess: // Given a local use and its user, classify information about it. @@ -669,9 +694,9 @@ class LocalsUseVisitor : public GenTreeVisitor class ReplaceVisitor : public GenTreeVisitor { - Promotion* m_prom; - jitstd::vector* m_replacements; - bool m_madeChanges = false; + Promotion* m_prom; + jitstd::vector** m_replacements; + bool m_madeChanges = false; public: enum @@ -680,7 +705,7 @@ class ReplaceVisitor : public GenTreeVisitor UseExecutionOrder = true, }; - ReplaceVisitor(Promotion* prom, jitstd::vector* replacements) + ReplaceVisitor(Promotion* prom, jitstd::vector** replacements) : GenTreeVisitor(prom->m_compiler), m_prom(prom), m_replacements(replacements) { } @@ -840,15 +865,15 @@ class ReplaceVisitor : public GenTreeVisitor // void ReplaceLocal(GenTree** use, GenTree* user) { - GenTreeLclVarCommon* lcl = (*use)->AsLclVarCommon(); - unsigned lclNum = lcl->GetLclNum(); - jitstd::vector& replacements = m_replacements[lclNum]; - - if (replacements.size() <= 0) + GenTreeLclVarCommon* lcl = (*use)->AsLclVarCommon(); + unsigned lclNum = lcl->GetLclNum(); + if (m_replacements[lclNum] == nullptr) { return; } + jitstd::vector& replacements = *m_replacements[lclNum]; + unsigned offs = lcl->GetLclOffs(); var_types accessType = lcl->TypeGet(); @@ -941,8 +966,13 @@ class ReplaceVisitor : public GenTreeVisitor // void WriteBackBefore(GenTree** use, unsigned lcl, unsigned offs, unsigned size) { - jitstd::vector replacements = m_replacements[lcl]; - size_t index = BinarySearch(replacements, offs); + if (m_replacements[lcl] == nullptr) + { + return; + } + + jitstd::vector& replacements = *m_replacements[lcl]; + size_t index = BinarySearch(replacements, offs); if ((ssize_t)index < 0) { @@ -989,7 +1019,12 @@ class ReplaceVisitor : public GenTreeVisitor // void MarkForReadBack(unsigned lcl, unsigned offs, unsigned size, bool conservative = false) { - jitstd::vector& replacements = m_replacements[lcl]; + if (m_replacements[lcl] == nullptr) + { + return; + } + + jitstd::vector& replacements = *m_replacements[lcl]; size_t index = BinarySearch(replacements, offs); if ((ssize_t)index < 0) @@ -1052,29 +1087,36 @@ PhaseStatus Promotion::Run() { for (unsigned lcl = 0; lcl < m_compiler->lvaCount; lcl++) { - LocalsUses* uses = localsUse.GetUsesByLocal(lcl); - uses->Dump(lcl); + LocalUses* uses = localsUse.GetUsesByLocal(lcl); + if (uses != nullptr) + { + uses->Dump(lcl); + } } } #endif // Pick promotion based on the use information we just collected. - bool anyReplacements = false; - jitstd::vector* replacements = reinterpret_cast*>( - new (m_compiler, CMK_Promotion) char[sizeof(jitstd::vector) * m_compiler->lvaCount]); + bool anyReplacements = false; + jitstd::vector** replacements = + new (m_compiler, CMK_Promotion) jitstd::vector*[m_compiler->lvaCount]{}; for (unsigned i = 0; i < numLocals; i++) { - new (&replacements[i], jitstd::placement_t()) - jitstd::vector(m_compiler->getAllocator(CMK_Promotion)); + LocalUses* uses = localsUse.GetUsesByLocal(i); + if (uses == nullptr) + { + continue; + } - localsUse.GetUsesByLocal(i)->PickPromotions(m_compiler, i, replacements[i]); + uses->PickPromotions(m_compiler, i, &replacements[i]); - if (replacements[i].size() > 0) + if (replacements[i] != nullptr) { + assert(replacements[i]->size() > 0); anyReplacements = true; #ifdef DEBUG - JITDUMP("V%02u promoted with %d replacements\n", i, (int)replacements[i].size()); - for (const Replacement& rep : replacements[i]) + JITDUMP("V%02u promoted with %d replacements\n", i, (int)replacements[i]->size()); + for (const Replacement& rep : *replacements[i]) { JITDUMP(" [%03u..%03u) promoted as %s V%02u\n", rep.Offset, rep.Offset + genTypeSize(rep.AccessType), varTypeName(rep.AccessType), rep.LclNum); @@ -1107,9 +1149,13 @@ PhaseStatus Promotion::Run() for (unsigned i = 0; i < numLocals; i++) { - for (unsigned j = 0; j < replacements[i].size(); j++) + if (replacements[i] == nullptr) + { + continue; + } + + for (Replacement& rep : *replacements[i]) { - Replacement& rep = replacements[i][j]; assert(!rep.NeedsReadBack || !rep.NeedsWriteBack); if (rep.NeedsReadBack) { @@ -1131,10 +1177,15 @@ PhaseStatus Promotion::Run() Statement* prevStmt = nullptr; for (unsigned lclNum = 0; lclNum < numLocals; lclNum++) { + if (replacements[lclNum] == nullptr) + { + continue; + } + LclVarDsc* dsc = m_compiler->lvaGetDesc(lclNum); if (dsc->lvIsParam || dsc->lvIsOSRLocal) { - InsertInitialReadBack(lclNum, replacements[lclNum], &prevStmt); + InsertInitialReadBack(lclNum, *replacements[lclNum], &prevStmt); } else if (dsc->lvSuppressedZeroInit) { @@ -1143,7 +1194,7 @@ PhaseStatus Promotion::Run() // Now that we are promoting some fields that assumption may be // invalidated for those fields, and we may need to insert explicit // zero inits again. - ExplicitlyZeroInitReplacementLocals(lclNum, replacements[lclNum], &prevStmt); + ExplicitlyZeroInitReplacementLocals(lclNum, *replacements[lclNum], &prevStmt); } } From 7424c03c7a8e67b0b2f1cfc02c4eff8a05d16e59 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 30 Mar 2023 00:50:23 +0200 Subject: [PATCH 38/52] Add GTF_DEBUG_NODE_MORPHED properly in new block morph --- src/coreclr/jit/morphblock.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/morphblock.cpp b/src/coreclr/jit/morphblock.cpp index 9c9ae0952b12b..0bbd2e9d1d3b0 100644 --- a/src/coreclr/jit/morphblock.cpp +++ b/src/coreclr/jit/morphblock.cpp @@ -144,7 +144,9 @@ GenTree* MorphInitBlockHelper::Morph() assert(m_result != nullptr); #ifdef DEBUG - if (m_result != m_asg) + // If we are going to return a different node than the input then morph + // expects us to have set GTF_DEBUG_NODE_MORPHED. + if ((m_result != m_asg) || (sideEffects != nullptr)) { m_result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; } @@ -158,6 +160,7 @@ GenTree* MorphInitBlockHelper::Morph() commaPool = commaPool->gtNext; assert(comma->OperIs(GT_COMMA)); + comma->gtType = m_result->TypeGet(); comma->AsOp()->gtOp1 = sideEffects; comma->AsOp()->gtOp2 = m_result; comma->gtFlags = (sideEffects->gtFlags | m_result->gtFlags) & GTF_ALL_EFFECT; From 1305aad0c3e8054a03bafed9d1935a391318c150 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 30 Mar 2023 01:17:54 +0200 Subject: [PATCH 39/52] Update statement side effects on replacer changes --- src/coreclr/jit/promotion.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index be06312ef33da..3c02993f027f8 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -1142,6 +1142,7 @@ PhaseStatus Promotion::Run() if (replacer.MadeChanges()) { m_compiler->fgSequenceLocals(stmt); + m_compiler->gtUpdateStmtSideEffects(stmt); JITDUMP("New statement:\n"); DISPSTMT(stmt); } From 9709b7b9e082ff89299b84dd29483970a16b5285 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 30 Mar 2023 15:51:07 +0200 Subject: [PATCH 40/52] Skip local copy prop assertions in problematic case --- src/coreclr/jit/assertionprop.cpp | 16 ++++++++++++++++ src/coreclr/jit/compiler.h | 3 +++ src/coreclr/jit/morphblock.cpp | 2 +- src/coreclr/jit/promotion.cpp | 25 +++++++++++++++++++++++++ 4 files changed, 45 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/assertionprop.cpp b/src/coreclr/jit/assertionprop.cpp index 7c53a70f2920f..35cc1ee2f080a 100644 --- a/src/coreclr/jit/assertionprop.cpp +++ b/src/coreclr/jit/assertionprop.cpp @@ -1256,6 +1256,22 @@ AssertionIndex Compiler::optCreateAssertion(GenTree* op1, goto DONE_ASSERTION; // Don't make an assertion } + // We process locals when we see the LCL_VAR node instead + // of at its actual use point (its parent). That opens us + // up to problems in a case like the following, assuming we + // allowed creating an assertion like V10 = V35: + // + // └──▌ ADD int + // ├──▌ LCL_VAR int V10 tmp6 -> copy propagated to [V35 tmp31] + // └──▌ COMMA int + // ├──▌ ASG int + // │ ├──▌ LCL_VAR int V35 tmp31 + // │ └──▌ LCL_FLD int V03 loc1 [+4] + if (lclVar2->lvRedefinedInEmbeddedStatement) + { + goto DONE_ASSERTION; // Don't make an assertion + } + assertion.op2.kind = O2K_LCLVAR_COPY; assertion.op2.vn = optConservativeNormalVN(op2); assertion.op2.lcl.lclNum = lclNum2; diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 2fac604a25f51..cb6a52e7f6965 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -653,6 +653,9 @@ class LclVarDsc unsigned char lvIsOSRLocal : 1; // Root method local in an OSR method. Any stack home will be on the Tier0 frame. // Initial value will be defined by Tier0. Requires special handing in prolog. + unsigned char lvRedefinedInEmbeddedStatement : 1; // Local has redefinitions inside embedded statements that + // disqualify it from local copy prop. + private: unsigned char lvIsNeverNegative : 1; // The local is known to be never negative diff --git a/src/coreclr/jit/morphblock.cpp b/src/coreclr/jit/morphblock.cpp index 0bbd2e9d1d3b0..545b0542f2c56 100644 --- a/src/coreclr/jit/morphblock.cpp +++ b/src/coreclr/jit/morphblock.cpp @@ -146,7 +146,7 @@ GenTree* MorphInitBlockHelper::Morph() #ifdef DEBUG // If we are going to return a different node than the input then morph // expects us to have set GTF_DEBUG_NODE_MORPHED. - if ((m_result != m_asg) || (sideEffects != nullptr)) + if ((m_result != m_asg) /* || (sideEffects != nullptr)*/) { m_result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; } diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 3c02993f027f8..b7f0aeaf74856 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -926,6 +926,31 @@ class ReplaceVisitor : public GenTreeVisitor GenTree* src = m_compiler->gtNewLclFldNode(lclNum, rep.AccessType, rep.Offset); *use = m_compiler->gtNewOperNode(GT_COMMA, (*use)->TypeGet(), m_compiler->gtNewAssignNode(dst, src), *use); rep.NeedsReadBack = false; + + // TODO-CQ: Local copy prop does not take into account that the + // uses of LCL_VAR occur at the user, which means it may introduce + // illegally overlapping lifetimes, such as: + // + // └──▌ ADD int + // ├──▌ LCL_VAR int V10 tmp6 -> copy propagated to [V35 tmp31] + // └──▌ COMMA int + // ├──▌ ASG int + // │ ├──▌ LCL_VAR int V35 tmp31 + // │ └──▌ LCL_FLD int V03 loc1 [+4] + // This really ought to be handled by local copy prop, but the way it works during + // morph makes it hard to fix there. + // + // This is the short term fix. Long term fixes may be: + // 1. Fix local copy prop + // 2. Teach LSRA to allow the above cases, simplifying IR concepts (e.g. + // introduce something like GT_COPY on top of LCL_VAR when they + // need to be "defs") + // 3. Change the pass here to avoid creating any embedded assignments by making use + // of gtSplitTree. We will only need to split in very edge cases since the point + // at which the replacement was marked as needing read back is practically always + // going to be in a previous statement, so this shouldn't be too bad for CQ. + + m_compiler->lvaGetDesc(rep.LclNum)->lvRedefinedInEmbeddedStatement = true; } m_madeChanges = true; From a22b62f2edad4db56498f71b4b691dd8e15ead08 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 30 Mar 2023 22:29:15 +0200 Subject: [PATCH 41/52] Undo accidental reversion --- src/coreclr/jit/morphblock.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/morphblock.cpp b/src/coreclr/jit/morphblock.cpp index 545b0542f2c56..0bbd2e9d1d3b0 100644 --- a/src/coreclr/jit/morphblock.cpp +++ b/src/coreclr/jit/morphblock.cpp @@ -146,7 +146,7 @@ GenTree* MorphInitBlockHelper::Morph() #ifdef DEBUG // If we are going to return a different node than the input then morph // expects us to have set GTF_DEBUG_NODE_MORPHED. - if ((m_result != m_asg) /* || (sideEffects != nullptr)*/) + if ((m_result != m_asg) || (sideEffects != nullptr)) { m_result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; } From 7595039dc1808d835a3ddb3e54c57f8a10f020d1 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 31 Mar 2023 11:01:00 +0200 Subject: [PATCH 42/52] Fix treatment of retbufs --- src/coreclr/jit/promotion.cpp | 91 ++++++++++++++++++----------------- 1 file changed, 46 insertions(+), 45 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index b7f0aeaf74856..25b7236ffde26 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -415,7 +415,7 @@ class LocalUses // mov reg, [reg+offs] // It may also be contained. Overall we are going to cost each use of // an unpromoted local at 6.5 bytes. - // TODO: We can make much better guesses on what will and won't be contained. + // TODO-CQ: We can make much better guesses on what will and won't be contained. costWithout += access.CountWtd * 6.5; weight_t costWith = 0; @@ -560,29 +560,35 @@ class LocalsUseVisitor : public GenTreeVisitor { GenTree* tree = *use; - if (tree->OperIsLocal()) + if (tree->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR)) { GenTreeLclVarCommon* lcl = tree->AsLclVarCommon(); LclVarDsc* dsc = m_compiler->lvaGetDesc(lcl); if (!dsc->lvPromoted && (dsc->TypeGet() == TYP_STRUCT) && !dsc->IsAddressExposed()) { + var_types accessType; + ClassLayout* accessLayout; + AccessKindFlags accessFlags; + if (lcl->OperIsLocalAddr()) { assert(user->OperIs(GT_CALL) && dsc->IsHiddenBufferStructArg() && (user->AsCall()->gtArgs.GetRetBufferArg()->GetNode() == lcl)); - // TODO: We should record that this is used as the address - // of a retbuf -- it makes promotion less desirable as we - // have to reload fields back from the retbuf. + + accessType = TYP_STRUCT; + accessLayout = m_compiler->typGetObjLayout(user->AsCall()->gtRetClsHnd); + accessFlags = AccessKindFlags::IsCallRetBuf; } else { - unsigned offs = lcl->GetLclOffs(); - var_types accessType = lcl->TypeGet(); - ClassLayout* accessLayout = accessType == TYP_STRUCT ? lcl->GetLayout(m_compiler) : nullptr; - AccessKindFlags accessFlags = ClassifyLocalAccess(lcl, user); - GetOrCreateUses(lcl->GetLclNum()) - ->RecordAccess(offs, accessType, accessLayout, accessFlags, m_curBB->getBBWeight(m_compiler)); + accessType = lcl->TypeGet(); + accessLayout = accessType == TYP_STRUCT ? lcl->GetLayout(m_compiler) : nullptr; + accessFlags = ClassifyLocalRead(lcl, user); } + + LocalUses* uses = GetOrCreateUses(lcl->GetLclNum()); + unsigned offs = lcl->GetLclOffs(); + uses->RecordAccess(offs, accessType, accessLayout, accessFlags, m_curBB->getBBWeight(m_compiler)); } } @@ -620,52 +626,47 @@ class LocalsUseVisitor : public GenTreeVisitor // Returns: // Flags classifying the access. // - AccessKindFlags ClassifyLocalAccess(GenTreeLclVarCommon* lcl, GenTree* user) + AccessKindFlags ClassifyLocalRead(GenTreeLclVarCommon* lcl, GenTree* user) { + assert(lcl->OperIsLocalRead()); + AccessKindFlags flags = AccessKindFlags::None; if (user->IsCall()) { - GenTreeCall* call = user->AsCall(); - if (call->IsOptimizingRetBufAsLocal() && (m_compiler->gtCallGetDefinedRetBufLclAddr(call) == lcl)) - { - flags |= AccessKindFlags::IsCallRetBuf; - } - else + GenTreeCall* call = user->AsCall(); + unsigned argIndex = 0; + for (CallArg& arg : call->gtArgs.Args()) { - unsigned argIndex = 0; - for (CallArg& arg : call->gtArgs.Args()) + if (arg.GetNode() != lcl) { - if (arg.GetNode() != lcl) - { - argIndex++; - continue; - } + argIndex++; + continue; + } - flags |= AccessKindFlags::IsCallArg; + flags |= AccessKindFlags::IsCallArg; - unsigned argSize = 0; - if (arg.GetSignatureType() != TYP_STRUCT) - { - argSize = genTypeSize(arg.GetSignatureType()); - } - else - { - argSize = m_compiler->typGetObjLayout(arg.GetSignatureClassHandle())->GetSize(); - } + unsigned argSize = 0; + if (arg.GetSignatureType() != TYP_STRUCT) + { + argSize = genTypeSize(arg.GetSignatureType()); + } + else + { + argSize = m_compiler->typGetObjLayout(arg.GetSignatureClassHandle())->GetSize(); + } #ifdef WINDOWS_AMD64_ABI - if ((argSize != 1) && (argSize != 2) && (argSize != 4) && (argSize != 8)) - { - flags |= AccessKindFlags::IsCallArgByImplicitRef; - } + if ((argSize != 1) && (argSize != 2) && (argSize != 4) && (argSize != 8)) + { + flags |= AccessKindFlags::IsCallArgByImplicitRef; + } - if (argIndex >= 4) - { - flags |= AccessKindFlags::IsCallArgOnStack; - } -#endif - break; + if (argIndex >= 4) + { + flags |= AccessKindFlags::IsCallArgOnStack; } +#endif + break; } } From 585a85a7b6c0ad8cd5c05d2d943ca03d55efb1c0 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 31 Mar 2023 11:03:42 +0200 Subject: [PATCH 43/52] Adjust some stress frequencies --- src/coreclr/jit/morph.cpp | 2 +- src/coreclr/jit/promotion.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 2e56346fe65a3..f13dd8fefc6cf 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -14750,7 +14750,7 @@ PhaseStatus Compiler::fgPromoteStructs() } #ifdef DEBUG - if (compStressCompile(STRESS_NO_OLD_PROMOTION, 20)) + if (compStressCompile(STRESS_NO_OLD_PROMOTION, 10)) { JITDUMP(" skipping due to stress\n"); return PhaseStatus::MODIFIED_NOTHING; diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 25b7236ffde26..275243b2b0488 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -20,10 +20,10 @@ PhaseStatus Compiler::GeneralizedPromotion() return PhaseStatus::MODIFIED_NOTHING; } -// if (!compStressCompile(STRESS_GENERALIZED_PROMOTION, 25)) -//{ -// return PhaseStatus::MODIFIED_NOTHING; -//} + if (!compStressCompile(STRESS_GENERALIZED_PROMOTION, 25)) + { + return PhaseStatus::MODIFIED_NOTHING; + } #ifdef DEBUG static ConfigMethodRange s_range; From e8ebbb55a5fe180ca9286f3f1fe85071aef8edc5 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 4 Apr 2023 15:02:50 +0200 Subject: [PATCH 44/52] Fix incorrect assert --- src/coreclr/jit/promotion.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 275243b2b0488..7142707fbe8f2 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -1066,7 +1066,7 @@ class ReplaceVisitor : public GenTreeVisitor while ((index < replacements.size()) && (replacements[index].Offset < end)) { Replacement& rep = replacements[index]; - assert((rep.Offset >= offs) && (rep.Offset + genTypeSize(rep.AccessType) <= end)); + assert(rep.Overlaps(offs, size)); rep.NeedsReadBack = true; rep.NeedsWriteBack = false; index++; From 7c8e1305cc930cef34687f291f47e9f0eace1fc6 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 4 Apr 2023 15:42:54 +0200 Subject: [PATCH 45/52] Rename pass to "physical promotion" --- src/coreclr/jit/compiler.cpp | 4 ++-- src/coreclr/jit/compiler.h | 6 +++--- src/coreclr/jit/compphases.h | 2 +- src/coreclr/jit/jitconfigvalues.h | 2 +- src/coreclr/jit/promotion.cpp | 10 +++++----- src/tests/Common/testenvironment.proj | 4 ++-- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 4b5d043ddbc91..5ebcc227ae239 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4713,9 +4713,9 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // DoPhase(this, PHASE_EARLY_LIVENESS, &Compiler::fgEarlyLiveness); - // Promote more struct locals + // Promote struct locals based on primitive access patterns // - DoPhase(this, PHASE_GENERALIZED_PROMOTION, &Compiler::GeneralizedPromotion); + DoPhase(this, PHASE_PHYSICAL_PROMOTION, &Compiler::PhysicalPromotion); // Run a simple forward substitution pass. // diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index b3ea3bc3327fb..f2111e77dae8a 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6100,7 +6100,7 @@ class Compiler PhaseStatus fgMarkAddressExposedLocals(); void fgSequenceLocals(Statement* stmt); - PhaseStatus GeneralizedPromotion(); + PhaseStatus PhysicalPromotion(); PhaseStatus fgForwardSub(); bool fgForwardSubBlock(BasicBlock* block); @@ -9860,8 +9860,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX STRESS_MODE(SPLIT_TREES_RANDOMLY) /* Split all statements at a random tree */ \ STRESS_MODE(SPLIT_TREES_REMOVE_COMMAS) /* Remove all GT_COMMA nodes */ \ STRESS_MODE(NO_OLD_PROMOTION) /* Do not use old promotion */ \ - STRESS_MODE(GENERALIZED_PROMOTION) /* Use generalized promotion */ \ - STRESS_MODE(GENERALIZED_PROMOTION_COST) \ + STRESS_MODE(PHYSICAL_PROMOTION) /* Use physical promotion */ \ + STRESS_MODE(PHYSICAL_PROMOTION_COST) \ \ /* After COUNT_VARN, stress level 2 does all of these all the time */ \ \ diff --git a/src/coreclr/jit/compphases.h b/src/coreclr/jit/compphases.h index 6936a0e03a0d0..808742b4f0116 100644 --- a/src/coreclr/jit/compphases.h +++ b/src/coreclr/jit/compphases.h @@ -43,7 +43,7 @@ CompPhaseNameMacro(PHASE_UPDATE_FINALLY_FLAGS, "Update finally target flag CompPhaseNameMacro(PHASE_EARLY_UPDATE_FLOW_GRAPH, "Update flow graph early pass", false, -1, false) CompPhaseNameMacro(PHASE_STR_ADRLCL, "Morph - Structs/AddrExp", false, -1, false) CompPhaseNameMacro(PHASE_EARLY_LIVENESS, "Early liveness", false, -1, false) -CompPhaseNameMacro(PHASE_GENERALIZED_PROMOTION, "Generalized promotion", false, -1, false) +CompPhaseNameMacro(PHASE_PHYSICAL_PROMOTION, "Physical promotion", false, -1, false) CompPhaseNameMacro(PHASE_FWD_SUB, "Forward Substitution", false, -1, false) CompPhaseNameMacro(PHASE_MORPH_IMPBYREF, "Morph - ByRefs", false, -1, false) CompPhaseNameMacro(PHASE_PROMOTE_STRUCTS, "Morph - Promote Structs", false, -1, false) diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index ad75a8a274411..d38d0e992d7d9 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -431,7 +431,7 @@ CONFIG_STRING(JitEnableVNBasedDeadStoreRemovalRange, W("JitEnableVNBasedDeadStor CONFIG_STRING(JitEnableEarlyLivenessRange, W("JitEnableEarlyLivenessRange")) CONFIG_STRING(JitOnlyOptimizeRange, W("JitOnlyOptimizeRange")) // If set, all methods that do _not_ match are forced into MinOpts -CONFIG_STRING(JitEnableGeneralizedPromotionRange, W("JitEnableGeneralizedPromotionRange")) +CONFIG_STRING(JitEnablePhysicalPromotionRange, W("JitEnablePhysicalPromotionRange")) CONFIG_INTEGER(JitDoSsa, W("JitDoSsa"), 1) // Perform Static Single Assignment (SSA) numbering on the variables CONFIG_INTEGER(JitDoValueNumber, W("JitDoValueNumber"), 1) // Perform value numbering on method expressions diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 7142707fbe8f2..ede3fa26995a2 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -3,12 +3,12 @@ #include "jitstd/algorithm.h" //------------------------------------------------------------------------ -// GeneralizedPromotion: Promote structs based on primitive access patterns. +// PhysicalPromotion: Promote structs based on primitive access patterns. // // Returns: // Suitable phase status. // -PhaseStatus Compiler::GeneralizedPromotion() +PhaseStatus Compiler::PhysicalPromotion() { if (!opts.OptEnabled(CLFLG_STRUCTPROMOTE)) { @@ -20,14 +20,14 @@ PhaseStatus Compiler::GeneralizedPromotion() return PhaseStatus::MODIFIED_NOTHING; } - if (!compStressCompile(STRESS_GENERALIZED_PROMOTION, 25)) + if (!compStressCompile(STRESS_PHYSICAL_PROMOTION, 25)) { return PhaseStatus::MODIFIED_NOTHING; } #ifdef DEBUG static ConfigMethodRange s_range; - s_range.EnsureInit(JitConfig.JitEnableGeneralizedPromotionRange()); + s_range.EnsureInit(JitConfig.JitEnablePhysicalPromotionRange()); if (!s_range.Contains(info.compMethodHash())) { @@ -459,7 +459,7 @@ class LocalUses } #ifdef DEBUG - if (comp->compStressCompile(Compiler::STRESS_GENERALIZED_PROMOTION_COST, 25)) + if (comp->compStressCompile(Compiler::STRESS_PHYSICAL_PROMOTION_COST, 25)) { JITDUMP(" Promoting replacement due to stress\n"); return true; diff --git a/src/tests/Common/testenvironment.proj b/src/tests/Common/testenvironment.proj index 7a777e133fb75..f745c2d128585 100644 --- a/src/tests/Common/testenvironment.proj +++ b/src/tests/Common/testenvironment.proj @@ -215,8 +215,8 @@ - - + + From 51586166c7d0c68843a0d7d6534c0c7ee1e07f16 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 4 Apr 2023 16:26:25 +0200 Subject: [PATCH 46/52] Fix after merge --- src/coreclr/jit/promotion.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index ede3fa26995a2..77470750b0541 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -560,7 +560,7 @@ class LocalsUseVisitor : public GenTreeVisitor { GenTree* tree = *use; - if (tree->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR)) + if (tree->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_LCL_ADDR)) { GenTreeLclVarCommon* lcl = tree->AsLclVarCommon(); LclVarDsc* dsc = m_compiler->lvaGetDesc(lcl); @@ -570,7 +570,7 @@ class LocalsUseVisitor : public GenTreeVisitor ClassLayout* accessLayout; AccessKindFlags accessFlags; - if (lcl->OperIsLocalAddr()) + if (lcl->OperIs(GT_LCL_ADDR)) { assert(user->OperIs(GT_CALL) && dsc->IsHiddenBufferStructArg() && (user->AsCall()->gtArgs.GetRetBufferArg()->GetNode() == lcl)); @@ -837,7 +837,7 @@ class ReplaceVisitor : public GenTreeVisitor if (call->IsOptimizingRetBufAsLocal()) { assert(retBufArg != nullptr); - assert(retBufArg->GetNode()->OperIsLocalAddr()); + assert(retBufArg->GetNode()->OperIs(GT_LCL_ADDR)); GenTreeLclVarCommon* retBufLcl = retBufArg->GetNode()->AsLclVarCommon(); unsigned size = m_compiler->typGetObjLayout(call->gtRetClsHnd)->GetSize(); From 96bad09a433c9153673a7c66b73a336139a7c0e7 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 5 Apr 2023 12:21:03 +0200 Subject: [PATCH 47/52] Revert unnecessary change --- src/coreclr/jit/morphblock.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/morphblock.cpp b/src/coreclr/jit/morphblock.cpp index df219bd413126..11aded9147934 100644 --- a/src/coreclr/jit/morphblock.cpp +++ b/src/coreclr/jit/morphblock.cpp @@ -790,7 +790,7 @@ void MorphCopyBlockHelper::TrySpecialCases() m_transformationDecision = BlockTransformation::SkipMultiRegSrc; m_result = m_asg; } - else if (m_src->gtEffectiveVal()->IsCall() && m_dst->OperIs(GT_LCL_VAR) && + else if (m_src->IsCall() && m_dst->OperIs(GT_LCL_VAR) && m_dstVarDsc->CanBeReplacedWithItsField(m_comp)) { JITDUMP("Not morphing a single reg call return\n"); From 3172f375f0404f5195ef32bbf8f048fe371e9fab Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 5 Apr 2023 12:39:49 +0200 Subject: [PATCH 48/52] Include headers instead of forward declarations --- src/coreclr/jit/promotion.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/promotion.h b/src/coreclr/jit/promotion.h index d898c2de26a72..2ae42e3312c0f 100644 --- a/src/coreclr/jit/promotion.h +++ b/src/coreclr/jit/promotion.h @@ -4,7 +4,9 @@ #ifndef _PROMOTION_H #define _PROMOTION_H -class Compiler; +#include "compiler.h" +#include "vector.h" + struct Replacement; class Promotion From 0b335ff3ce117a016f00ba61d3df57693655fa9e Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 5 Apr 2023 12:40:04 +0200 Subject: [PATCH 49/52] Remove some currently unused accounting --- src/coreclr/jit/promotion.cpp | 40 ++++------------------------------- 1 file changed, 4 insertions(+), 36 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 77470750b0541..5a7e783bd6697 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -53,8 +53,6 @@ struct Access // Number of times this access is on the LHS of an assignment. unsigned CountAssignmentDestination = 0; unsigned CountCallArgs = 0; - unsigned CountCallArgsByImplicitRef = 0; - unsigned CountCallArgsOnStack = 0; unsigned CountReturns = 0; unsigned CountPassedAsRetbuf = 0; @@ -62,8 +60,6 @@ struct Access weight_t CountAssignmentSourceWtd = 0; weight_t CountAssignmentDestinationWtd = 0; weight_t CountCallArgsWtd = 0; - weight_t CountCallArgsByImplicitRefWtd = 0; - weight_t CountCallArgsOnStackWtd = 0; weight_t CountReturnsWtd = 0; weight_t CountPassedAsRetbufWtd = 0; @@ -178,10 +174,8 @@ enum class AccessKindFlags : uint32_t IsCallArg = 1, IsAssignmentSource = 2, IsAssignmentDestination = 4, - IsCallArgByImplicitRef = 8, - IsCallArgOnStack = 16, - IsCallRetBuf = 32, - IsReturned = 64, + IsCallRetBuf = 8, + IsReturned = 16, }; inline constexpr AccessKindFlags operator~(AccessKindFlags a) @@ -283,18 +277,6 @@ class LocalUses { access->CountCallArgs++; access->CountCallArgsWtd += weight; - - if ((flags & AccessKindFlags::IsCallArgByImplicitRef) != AccessKindFlags::None) - { - access->CountCallArgsByImplicitRef++; - access->CountCallArgsByImplicitRefWtd += weight; - } - - if ((flags & AccessKindFlags::IsCallArgOnStack) != AccessKindFlags::None) - { - access->CountCallArgsOnStack++; - access->CountCallArgsOnStackWtd += weight; - } } if ((flags & AccessKindFlags::IsCallRetBuf) != AccessKindFlags::None) @@ -408,7 +390,8 @@ class LocalUses } // TODO-CQ: Tune the following heuristics. Currently they are based on - // x64 code size although using BB weights when available. + // x64 code size although using BB weights when available. The mixing + // does not really make sense. weight_t costWithout = 0; // A normal access without promotion looks like: @@ -497,10 +480,6 @@ class LocalUses access.CountAssignmentDestinationWtd); printf(" # as call arg: (%u, " FMT_WT ")\n", access.CountCallArgs, access.CountCallArgsWtd); - printf(" # as implicit by-ref call arg: (%u, " FMT_WT ")\n", access.CountCallArgsByImplicitRef, - access.CountCallArgsByImplicitRefWtd); - printf(" # as on-stack call arg: (%u, " FMT_WT ")\n", access.CountCallArgsOnStack, - access.CountCallArgsOnStackWtd); printf(" # as retbuf: (%u, " FMT_WT ")\n", access.CountPassedAsRetbuf, access.CountPassedAsRetbufWtd); printf(" # as returned value: (%u, " FMT_WT ")\n\n", access.CountReturns, @@ -655,17 +634,6 @@ class LocalsUseVisitor : public GenTreeVisitor argSize = m_compiler->typGetObjLayout(arg.GetSignatureClassHandle())->GetSize(); } -#ifdef WINDOWS_AMD64_ABI - if ((argSize != 1) && (argSize != 2) && (argSize != 4) && (argSize != 8)) - { - flags |= AccessKindFlags::IsCallArgByImplicitRef; - } - - if (argIndex >= 4) - { - flags |= AccessKindFlags::IsCallArgOnStack; - } -#endif break; } } From cbb7d844a350b4765bea411d3954b3619eddf2db Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 5 Apr 2023 13:11:20 +0200 Subject: [PATCH 50/52] Clean up, add some comments --- src/coreclr/jit/promotion.cpp | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 5a7e783bd6697..e23bae7133624 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -390,8 +390,8 @@ class LocalUses } // TODO-CQ: Tune the following heuristics. Currently they are based on - // x64 code size although using BB weights when available. The mixing - // does not really make sense. + // x64 code size although using BB weights when available. This mixing + // does not make sense. weight_t costWithout = 0; // A normal access without promotion looks like: @@ -822,7 +822,7 @@ class ReplaceVisitor : public GenTreeVisitor // user - The user of the local. // // Notes: - // This usually amounts to making replacing like + // This usually amounts to making a replacement like // // LCL_FLD int V00 [+8] -> LCL_VAR int V10. // @@ -853,7 +853,7 @@ class ReplaceVisitor : public GenTreeVisitor } else { - ClassLayout* accessLayout = varTypeIsStruct(accessType) ? lcl->GetLayout(m_compiler) : nullptr; + ClassLayout* accessLayout = accessType == TYP_STRUCT ? lcl->GetLayout(m_compiler) : nullptr; unsigned accessSize = accessLayout != nullptr ? accessLayout->GetSize() : genTypeSize(accessType); for (const Replacement& rep : replacements) { @@ -953,7 +953,7 @@ class ReplaceVisitor : public GenTreeVisitor // replacements into a struct local. // // Parameters: - // use - The use, which will be updated with a cascasing comma trees of assignments + // use - The use, which will be updated with a cascading comma trees of assignments // lcl - The struct local // offs - The starting offset into the struct local of the overlapping range to write back to // size - The size of the overlapping range @@ -1000,7 +1000,7 @@ class ReplaceVisitor : public GenTreeVisitor //------------------------------------------------------------------------ // MarkForReadBack: - // Mark that replacements in the specified struct locals need to be read + // Mark that replacements in the specified struct local need to be read // back before their next use. // // Parameters: @@ -1124,6 +1124,7 @@ PhaseStatus Promotion::Run() return PhaseStatus::MODIFIED_NOTHING; } + // Make all replacements we decided on. ReplaceVisitor replacer(this, replacements); for (BasicBlock* bb : m_compiler->Blocks()) { @@ -1169,6 +1170,8 @@ PhaseStatus Promotion::Run() } } + // Insert initial IR to read arguments/OSR locals into replacement locals, + // and add necessary explicit zeroing. Statement* prevStmt = nullptr; for (unsigned lclNum = 0; lclNum < numLocals; lclNum++) { @@ -1239,7 +1242,7 @@ void Promotion::ExplicitlyZeroInitReplacementLocals(unsigned if (!m_compiler->fgVarNeedsExplicitZeroInit(rep.LclNum, false, false)) { - // Other downstream code (e.g. recursive tailcalls-to-loops) may + // Other downstream code (e.g. recursive-tailcalls-to-loops opt) may // still need to insert further explicit zero initing. m_compiler->lvaGetDesc(rep.LclNum)->lvSuppressedZeroInit = true; continue; @@ -1256,6 +1259,7 @@ void Promotion::ExplicitlyZeroInitReplacementLocals(unsigned // Promotion::InsertInitStatement: // Insert a new statement after the specified statement in the scratch block, // or at the beginning of the scratch block if no other statements were +// inserted yet. // // Parameters: // prevStmt - [in, out] Previous statement to insert after From 0809197ec8985d209757d63dfa01dba1fa4e6490 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 5 Apr 2023 13:29:47 +0200 Subject: [PATCH 51/52] Avoid diffs --- src/coreclr/jit/importercalls.cpp | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index f16815329e205..b4d2a0d717700 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -2881,16 +2881,8 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, // We need to use both index and ptr-to-span twice, so clone or spill. index = impCloneExpr(index, &indexClone, NO_CLASS_HANDLE, CHECK_SPILL_ALL, nullptr DEBUGARG("Span.get_Item index")); - - if (impIsAddressInLocal(ptrToSpan)) - { - ptrToSpanClone = gtCloneExpr(ptrToSpan); - } - else - { - ptrToSpan = impCloneExpr(ptrToSpan, &ptrToSpanClone, NO_CLASS_HANDLE, CHECK_SPILL_ALL, - nullptr DEBUGARG("Span.get_Item ptrToSpan")); - } + ptrToSpan = impCloneExpr(ptrToSpan, &ptrToSpanClone, NO_CLASS_HANDLE, CHECK_SPILL_ALL, + nullptr DEBUGARG("Span.get_Item ptrToSpan")); // Bounds check CORINFO_FIELD_HANDLE lengthHnd = info.compCompHnd->getFieldInClass(clsHnd, 1); From 8338272afa8780573d58198429fde66e64b30a6c Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 5 Apr 2023 13:30:04 +0200 Subject: [PATCH 52/52] Run jit-format --- src/coreclr/jit/morphblock.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/coreclr/jit/morphblock.cpp b/src/coreclr/jit/morphblock.cpp index 11aded9147934..6f60165a1295e 100644 --- a/src/coreclr/jit/morphblock.cpp +++ b/src/coreclr/jit/morphblock.cpp @@ -790,8 +790,7 @@ void MorphCopyBlockHelper::TrySpecialCases() m_transformationDecision = BlockTransformation::SkipMultiRegSrc; m_result = m_asg; } - else if (m_src->IsCall() && m_dst->OperIs(GT_LCL_VAR) && - m_dstVarDsc->CanBeReplacedWithItsField(m_comp)) + else if (m_src->IsCall() && m_dst->OperIs(GT_LCL_VAR) && m_dstVarDsc->CanBeReplacedWithItsField(m_comp)) { JITDUMP("Not morphing a single reg call return\n"); m_transformationDecision = BlockTransformation::SkipSingleRegCallSrc;