From b034e48c784b8a8d1edf25f3e8923de5c4b52041 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 12 Jun 2023 17:05:20 +0200 Subject: [PATCH] JIT: Add support for induced field accesses in physical promotion Add support for promoting fields based on the fact that assignment decomposition induces new field accesses. To do so we store all struct assignments that involve candidates for physical promotion as part of the initial accounting pass. Then when picking the replacements we do it as a fixpoint computation, iteratively expanding the set of replacements based on field accesses induced by decomposition assignment with the existing set of replacements. Fix #87371 --- src/coreclr/jit/promotion.cpp | 558 +++++++++++++++++++++++++++++++--- src/coreclr/jit/promotion.h | 12 +- 2 files changed, 522 insertions(+), 48 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 68bf759d0375f..415f178266e50 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -202,13 +202,28 @@ bool AggregateInfo::OverlappingReplacements(unsigned offset, return true; } +struct PrimitiveAccess +{ + weight_t CountWtd; + unsigned Offset; + var_types AccessType; + + INDEBUG(unsigned Count = 0); + + PrimitiveAccess(unsigned offset, var_types accessType) : Offset(offset), AccessType(accessType) + { + } +}; + // Tracks all the accesses into one particular struct local. class LocalUses { - jitstd::vector m_accesses; + jitstd::vector m_accesses; + jitstd::vector m_inducedAccesses; public: - LocalUses(Compiler* comp) : m_accesses(comp->getAllocator(CMK_Promotion)) + LocalUses(Compiler* comp) + : m_accesses(comp->getAllocator(CMK_Promotion)), m_inducedAccesses(comp->getAllocator(CMK_Promotion)) { } @@ -307,6 +322,56 @@ class LocalUses #endif } + //------------------------------------------------------------------------ + // RecordInducedAccess: + // Record an induced access into this local with the specified offset and access type. + // + // Parameters: + // offs - The offset being accessed + // accessType - The type of the access + // weight - Weight of the block containing the access + // + // Remarks: + // Induced accesses are accesses that are induced by physical promotion + // due to assignment decompositon. They are always of primitive type. + // + void RecordInducedAccess(unsigned offs, var_types accessType, weight_t weight) + { + PrimitiveAccess* access = nullptr; + + size_t index = 0; + if (m_inducedAccesses.size() > 0) + { + index = Promotion::BinarySearch(m_inducedAccesses, offs); + if ((ssize_t)index >= 0) + { + do + { + PrimitiveAccess& candidateAccess = m_inducedAccesses[index]; + if (candidateAccess.AccessType == accessType) + { + access = &candidateAccess; + break; + } + + index++; + } while (index < m_inducedAccesses.size() && m_inducedAccesses[index].Offset == offs); + } + else + { + index = ~index; + } + } + + if (access == nullptr) + { + access = &*m_inducedAccesses.insert(m_inducedAccesses.begin() + index, PrimitiveAccess(offs, accessType)); + } + + access->CountWtd += weight; + INDEBUG(access->Count++); + } + //------------------------------------------------------------------------ // PickPromotions: // Pick specific replacements to make for this struct local after a set @@ -336,33 +401,120 @@ class LocalUses continue; } - if (!EvaluateReplacement(comp, lclNum, access)) + if (!EvaluateReplacement(comp, lclNum, access, 0)) { continue; } -#ifdef DEBUG - char buf[32]; - sprintf_s(buf, sizeof(buf), "V%02u.[%03u..%03u)", lclNum, access.Offset, - access.Offset + genTypeSize(access.AccessType)); - size_t len = strlen(buf) + 1; - char* bufp = new (comp, CMK_DebugOnly) char[len]; - strcpy_s(bufp, len, buf); -#endif - unsigned newLcl = comp->lvaGrabTemp(false DEBUGARG(bufp)); - LclVarDsc* dsc = comp->lvaGetDesc(newLcl); - dsc->lvType = access.AccessType; + if (*aggregateInfo == nullptr) + { + *aggregateInfo = new (comp, CMK_Promotion) AggregateInfo(comp->getAllocator(CMK_Promotion), lclNum); + } + + (*aggregateInfo)->Replacements.push_back(Replacement(access.Offset, access.AccessType)); + } + + JITDUMP("\n"); + } + + //------------------------------------------------------------------------ + // PickInducedPromotions: + // Pick additional promotions to make based on the fact that some + // accesses will be induced by assignment decomposition. + // + // Parameters: + // comp - Compiler instance + // lclNum - Local num for this struct local + // aggregateInfo - [out] Pointer to aggregate info to create and insert replacements into. + // + bool PickInducedPromotions(Compiler* comp, unsigned lclNum, AggregateInfo** aggregateInfo) + { + if (m_inducedAccesses.size() <= 0) + { + return false; + } + + bool any = false; + JITDUMP("Picking induced promotions for V%02u\n", lclNum); + for (PrimitiveAccess& inducedAccess : m_inducedAccesses) + { + bool overlapsOtherInducedAccess = false; + for (PrimitiveAccess& otherInducedAccess : m_inducedAccesses) + { + if (&otherInducedAccess == &inducedAccess) + { + continue; + } + + if (inducedAccess.Offset + genTypeSize(inducedAccess.AccessType) <= otherInducedAccess.Offset) + { + break; + } + + if (otherInducedAccess.Offset + genTypeSize(otherInducedAccess.AccessType) <= inducedAccess.Offset) + { + continue; + } + + overlapsOtherInducedAccess = true; + } + + if (overlapsOtherInducedAccess) + { + continue; + } + + Access* access = FindAccess(inducedAccess.Offset, inducedAccess.AccessType); + + if (access == nullptr) + { + Access fakeAccess(inducedAccess.Offset, inducedAccess.AccessType, nullptr); + if (!EvaluateReplacement(comp, lclNum, fakeAccess, inducedAccess.CountWtd)) + { + continue; + } + } + else + { + if (!EvaluateReplacement(comp, lclNum, *access, inducedAccess.CountWtd)) + { + continue; + } + } if (*aggregateInfo == nullptr) { *aggregateInfo = new (comp, CMK_Promotion) AggregateInfo(comp->getAllocator(CMK_Promotion), lclNum); } + size_t insertionIndex; + if ((*aggregateInfo)->Replacements.size() > 0) + { +#ifdef DEBUG + Replacement* overlapRep; + assert(!(*aggregateInfo) + ->OverlappingReplacements(inducedAccess.Offset, genTypeSize(inducedAccess.AccessType), + &overlapRep, nullptr)); +#endif + + insertionIndex = + Promotion::BinarySearch((*aggregateInfo)->Replacements, + inducedAccess.Offset); + assert((ssize_t)insertionIndex < 0); + insertionIndex = ~insertionIndex; + } + else + { + insertionIndex = 0; + } + (*aggregateInfo) - ->Replacements.push_back(Replacement(access.Offset, access.AccessType, newLcl DEBUGARG(bufp))); + ->Replacements.insert((*aggregateInfo)->Replacements.begin() + insertionIndex, + Replacement(inducedAccess.Offset, inducedAccess.AccessType)); + any = true; } - JITDUMP("\n"); + return any; } //------------------------------------------------------------------------ @@ -370,14 +522,15 @@ class LocalUses // Evaluate legality and profitability of a single replacement candidate. // // Parameters: - // comp - Compiler instance - // lclNum - Local num for this struct local - // access - Access information for the candidate. + // comp - Compiler instance + // lclNum - Local num for this struct local + // access - Access information for the candidate. + // inducedCountWtd - Additional weighted count due to induced accesses. // // Returns: // True if we should promote this access and create a replacement; otherwise false. // - bool EvaluateReplacement(Compiler* comp, unsigned lclNum, const Access& access) + bool EvaluateReplacement(Compiler* comp, unsigned lclNum, const Access& access, weight_t inducedCountWtd) { weight_t countOverlappedCallArgWtd = 0; weight_t countOverlappedRetbufsWtd = 0; @@ -387,7 +540,9 @@ class LocalUses for (const Access& otherAccess : m_accesses) { if (&otherAccess == &access) + { continue; + } if (!otherAccess.Overlaps(access.Offset, genTypeSize(access.AccessType))) { @@ -407,14 +562,14 @@ class LocalUses weight_t costWithout = 0; // We cost any normal access (which is a struct load or store) without promotion at 3 cycles. - costWithout += access.CountWtd * 3; + costWithout += (access.CountWtd + inducedCountWtd) * 3; weight_t costWith = 0; // For promoted accesses we expect these to turn into reg-reg movs (and in many cases be fully contained in the // parent). // We cost these at 0.5 cycles. - costWith += access.CountWtd * 0.5; + costWith += (access.CountWtd + inducedCountWtd) * 0.5; // Now look at the overlapping struct uses that promotion will make more expensive. @@ -501,8 +656,24 @@ class LocalUses return false; } + //------------------------------------------------------------------------ + // ClearInducedAccesses: + // Clear the stored induced access metrics. + // + void ClearInducedAccesses() + { + m_inducedAccesses.clear(); + } + #ifdef DEBUG - void Dump(unsigned lclNum) + //------------------------------------------------------------------------ + // DumpAccesses: + // Dump the stored access metrics for a specified local. + // + // Parameters: + // lclNum - The local + // + void DumpAccesses(unsigned lclNum) { if (m_accesses.size() <= 0) { @@ -535,13 +706,76 @@ class LocalUses access.CountReturnsWtd); } } + + //------------------------------------------------------------------------ + // DumpInducedAccesses: + // Dump induced accesses for a specified struct local. + // + // Parameters: + // lclNum - The local + // + void DumpInducedAccesses(unsigned lclNum) + { + if (m_inducedAccesses.size() <= 0) + { + return; + } + + printf("Induced accesses for V%02u\n", lclNum); + for (PrimitiveAccess& access : m_inducedAccesses) + { + printf(" %s @ %03u\n", varTypeName(access.AccessType), access.Offset); + printf(" #: (%u, " FMT_WT ")\n", access.Count, access.CountWtd); + } + } #endif + +private: + //------------------------------------------------------------------------ + // FindAccess: + // Find access metrics information for the specified offset and access type. + // + // Parameters: + // offs - The offset + // accessType - Access type + // + // Returns: + // Pointer to a matching access, or nullptr if no match was found. + // + Access* FindAccess(unsigned offs, var_types accessType) + { + if (m_accesses.size() <= 0) + { + return nullptr; + } + + size_t index = Promotion::BinarySearch(m_accesses, offs); + if ((ssize_t)index < 0) + { + return nullptr; + } + + do + { + Access& candidateAccess = m_accesses[index]; + if (candidateAccess.AccessType == accessType) + { + return &candidateAccess; + } + + index++; + } while ((index < m_inducedAccesses.size()) && (m_inducedAccesses[index].Offset == offs)); + + return nullptr; + } }; +// Struct used to save all struct stores involving physical promotion candidates. +// These stores can induce new field accesses as part of assignment decomposition. struct CandidateStore { - GenTree* Store; - BasicBlock* Block; + GenTreeLclVarCommon* Store; + BasicBlock* Block; }; // Visitor that records information about uses of struct locals. @@ -578,6 +812,17 @@ class LocalsUseVisitor : public GenTreeVisitor m_curBB = bb; } + //------------------------------------------------------------------------ + // PreOrderVisit: + // Visit a node in preorder and add its use information to the metrics. + // + // Parameters: + // use - The use edge + // user - The user + // + // Returns: + // Visitor result + // fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) { GenTree* tree = *use; @@ -613,7 +858,7 @@ class LocalsUseVisitor : public GenTreeVisitor // Make sure we add it only once if both the destination and source are candidates. if ((m_candidateStores.Height() <= 0) || (m_candidateStores.Top().Store != user)) { - m_candidateStores.Push(CandidateStore{user, m_curBB}); + m_candidateStores.Push(CandidateStore{user->AsLclVarCommon(), m_curBB}); } } } @@ -627,6 +872,18 @@ class LocalsUseVisitor : public GenTreeVisitor return fgWalkResult::WALK_CONTINUE; } + //------------------------------------------------------------------------ + // PickPromotions: + // Pick promotions and create aggregate information for each promoted + // struct with promotions. + // + // Parameters: + // aggregates - Appropriately sized vector to create aggregate information in. + // + // Returns: + // True if any struct was physically promoted with at least one replacement; + // otherwise false. + // bool PickPromotions(jitstd::vector& aggregates) { unsigned numLocals = (unsigned)aggregates.size(); @@ -642,26 +899,138 @@ class LocalsUseVisitor : public GenTreeVisitor continue; } +#ifdef DEBUG if (m_compiler->verbose) { - uses->Dump(lclNum); + uses->DumpAccesses(lclNum); } +#endif uses->PickPromotions(m_compiler, lclNum, &aggregates[lclNum]); - if (aggregates[lclNum] == nullptr) + any |= aggregates[lclNum] != nullptr; + } + + if (!any) + { + return false; + } + + int iters = 0; + if (m_candidateStores.Height() > 0) + { + // Now look for induced accesses due to assignment decomposition. + + JITDUMP("Looking for induced accesses with %d stores between candidates\n", m_candidateStores.Height()); + // Expand the set of fields iteratively based on the current picked + // set. We put a limit on this fixpoint computation to avoid + // pathological cases. From measurements no methods in our own + // collections need more than 10 iterations and 99.5% of methods + // need fewer than 5 iterations. + // (Also note that this part of physical promotion does not seem to have measurable impact on TP.) + while (iters < 10) { - continue; + for (int i = 0; i < m_candidateStores.Height(); i++) + { + const CandidateStore& candidateStore = m_candidateStores.BottomRef(i); + GenTreeLclVarCommon* store = candidateStore.Store; + + assert(store->TypeIs(TYP_STRUCT)); + assert(store->Data()->OperIsLocalRead()); + + GenTreeLclVarCommon* src = store->Data()->AsLclVarCommon(); + + LclVarDsc* dstDsc = m_compiler->lvaGetDesc(store); + LclVarDsc* srcDsc = m_compiler->lvaGetDesc(src); + + assert(Promotion::IsCandidateForPhysicalPromotion(dstDsc) || + Promotion::IsCandidateForPhysicalPromotion(srcDsc)); + + if (dstDsc->lvPromoted) + { + InduceAccessesFromRegularlyPromotedStruct(aggregates, src, store, candidateStore.Block); + } + else if (srcDsc->lvPromoted) + { + InduceAccessesFromRegularlyPromotedStruct(aggregates, store, store, candidateStore.Block); + } + else + { + if (Promotion::IsCandidateForPhysicalPromotion(dstDsc)) + { + InduceAccessesInCandidate(aggregates, store, src, candidateStore.Block); + } + + if (Promotion::IsCandidateForPhysicalPromotion(srcDsc)) + { + InduceAccessesInCandidate(aggregates, src, store, candidateStore.Block); + } + } + } + + bool any = false; + for (unsigned lclNum = 0; lclNum < numLocals; lclNum++) + { + LocalUses* uses = m_uses[lclNum]; + if (uses == nullptr) + { + continue; + } +#ifdef DEBUG + if (m_compiler->verbose) + { + uses->DumpInducedAccesses(lclNum); + } +#endif + + any |= uses->PickInducedPromotions(m_compiler, lclNum, &aggregates[lclNum]); + } + + if (!any) + { + break; + } + + for (unsigned lclNum = 0; lclNum < numLocals; lclNum++) + { + if (m_uses[lclNum] != nullptr) + { + m_uses[lclNum]->ClearInducedAccesses(); + } + } } + } - AggregateInfo* agg = aggregates[lclNum]; + for (AggregateInfo* agg : aggregates) + { + if (agg == nullptr) + { + continue; + } - any = true; jitstd::vector& reps = agg->Replacements; assert(reps.size() > 0); + // Create locals + for (Replacement& rep : reps) + { #ifdef DEBUG - JITDUMP("V%02u promoted with %d replacements\n", lclNum, (int)reps.size()); + char buf[32]; + sprintf_s(buf, sizeof(buf), "V%02u.[%03u..%03u)", agg->LclNum, rep.Offset, + rep.Offset + genTypeSize(rep.AccessType)); + size_t len = strlen(buf) + 1; + char* bufp = new (m_compiler, CMK_DebugOnly) char[len]; + strcpy_s(bufp, len, buf); + rep.Description = bufp; +#endif + + rep.LclNum = m_compiler->lvaGrabTemp(false DEBUGARG(rep.Description)); + LclVarDsc* dsc = m_compiler->lvaGetDesc(rep.LclNum); + dsc->lvType = rep.AccessType; + } + +#ifdef DEBUG + JITDUMP("V%02u promoted with %d replacements\n", agg->LclNum, (int)reps.size()); for (const Replacement& rep : reps) { JITDUMP(" [%03u..%03u) promoted as %s V%02u\n", rep.Offset, rep.Offset + genTypeSize(rep.AccessType), @@ -669,13 +1038,12 @@ class LocalsUseVisitor : public GenTreeVisitor } #endif - JITDUMP("Computing unpromoted remainder for V%02u\n", lclNum); + JITDUMP("Computing unpromoted remainder for V%02u\n", agg->LclNum); StructSegments unpromotedParts = - Promotion::SignificantSegments(m_compiler, m_compiler->lvaGetDesc(lclNum)->GetLayout()); - for (size_t i = 0; i < reps.size(); i++) + Promotion::SignificantSegments(m_compiler, m_compiler->lvaGetDesc(agg->LclNum)->GetLayout()); + for (Replacement& rep : reps) { - unpromotedParts.Subtract( - StructSegments::Segment(reps[i].Offset, reps[i].Offset + genTypeSize(reps[i].AccessType))); + unpromotedParts.Subtract(StructSegments::Segment(rep.Offset, rep.Offset + genTypeSize(rep.AccessType))); } JITDUMP(" Remainder: "); @@ -693,6 +1061,8 @@ class LocalsUseVisitor : public GenTreeVisitor { // Aggregate is fully promoted, leave UnpromotedMin == UnpromotedMax to indicate this. } + + any = true; } return any; @@ -719,6 +1089,108 @@ class LocalsUseVisitor : public GenTreeVisitor return m_uses[lclNum]; } + //------------------------------------------------------------------------ + // InduceAccessesFromRegularlyPromotedStruct: + // Create induced accesses based on the fact that there is an assignment + // between a physical promotion candidate and regularly promoted struct. + // + // Parameters: + // aggregates - Aggregate information with current set of replacements + // for each struct local. + // candidateLcl - The local node for a physical promotion candidate. + // regPromLcl - The local node for the regularly promoted struct that + // may induce new LCL_FLD nodes in the candidate. + // block - The block that the assignment appears in. + // + void InduceAccessesFromRegularlyPromotedStruct(jitstd::vector& aggregates, + GenTreeLclVarCommon* candidateLcl, + GenTreeLclVarCommon* regPromLcl, + BasicBlock* block) + { + unsigned regPromOffs = regPromLcl->GetLclOffs(); + unsigned candidateOffs = candidateLcl->GetLclOffs(); + unsigned size = regPromLcl->GetLayout(m_compiler)->GetSize(); + + LclVarDsc* regPromDsc = m_compiler->lvaGetDesc(regPromLcl); + for (unsigned fieldLcl = regPromDsc->lvFieldLclStart, i = 0; i < regPromDsc->lvFieldCnt; fieldLcl++, i++) + { + LclVarDsc* fieldDsc = m_compiler->lvaGetDesc(fieldLcl); + if ((fieldDsc->lvFldOffset >= regPromOffs) && + (fieldDsc->lvFldOffset + genTypeSize(fieldDsc->lvType) <= (regPromOffs + size))) + { + InduceAccess(aggregates, candidateLcl->GetLclNum(), + candidateLcl->GetLclOffs() + (fieldDsc->lvFldOffset - regPromOffs), fieldDsc->lvType, + block); + } + } + } + + //------------------------------------------------------------------------ + // InduceAccessesInCandidate: + // Create induced accesses based on the fact that a specified candidate + // is being assigned from another struct local (the inducer). + // + // Parameters: + // aggregates - Aggregate information with current set of replacements + // for each struct local. + // candidate - The local node for the physical promotion candidate. + // inducer - The local node that may induce new LCL_FLD nodes in the candidate. + // block - The block that the assignment appears in. + // + void InduceAccessesInCandidate(jitstd::vector& aggregates, + GenTreeLclVarCommon* candidate, + GenTreeLclVarCommon* inducer, + BasicBlock* block) + { + unsigned candOffs = candidate->GetLclOffs(); + unsigned inducerOffs = inducer->GetLclOffs(); + unsigned size = candidate->GetLayout(m_compiler)->GetSize(); + + AggregateInfo* inducerAgg = aggregates[inducer->GetLclNum()]; + if (inducerAgg != nullptr) + { + Replacement* firstRep; + Replacement* endRep; + if (inducerAgg->OverlappingReplacements(inducerOffs, size, &firstRep, &endRep)) + { + for (Replacement* rep = firstRep; rep < endRep; rep++) + { + InduceAccess(aggregates, candidate->GetLclNum(), candOffs + (rep->Offset - inducerOffs), + rep->AccessType, block); + } + } + } + } + + //------------------------------------------------------------------------ + // InduceAccessesInCandidate: + // Record an induced access in a candidate for physical promotion. + // + // Parameters: + // aggregates - Aggregate information with current set of replacements + // for each struct local. + // lclNum - Local that has the induced access. + // offset - Offset at which the induced access starts. + // type - Type of the induced access. + // block - The block with the induced access. + // + void InduceAccess( + jitstd::vector& aggregates, unsigned lclNum, unsigned offset, var_types type, BasicBlock* block) + { + AggregateInfo* agg = aggregates[lclNum]; + if (agg != nullptr) + { + Replacement* overlapRep; + if (agg->OverlappingReplacements(offset, genTypeSize(type), &overlapRep, nullptr)) + { + return; + } + } + + LocalUses* uses = GetOrCreateUses(lclNum); + uses->RecordInducedAccess(offset, type, block->getBBWeight(m_compiler)); + } + //------------------------------------------------------------------------ // ClassifyLocalAccess: // Given a local node and its user, classify information about it. @@ -1197,8 +1669,6 @@ StructSegments Promotion::SignificantSegments(Compiler* compiler, } #endif - // TODO-TP: Cache this per class layout, we call this for every struct - // operation on a promoted local. return segments; } @@ -1221,6 +1691,11 @@ GenTree* Promotion::CreateWriteBack(Compiler* compiler, unsigned structLclNum, c { GenTree* value = compiler->gtNewLclVarNode(replacement.LclNum); GenTree* store = compiler->gtNewStoreLclFldNode(structLclNum, replacement.AccessType, replacement.Offset, value); + + if (!compiler->lvaGetDesc(structLclNum)->lvDoNotEnregister) + { + compiler->lvaSetVarDoNotEnregister(structLclNum DEBUGARG(DoNotEnregisterReason::LocalField)); + } return store; } @@ -1243,6 +1718,11 @@ GenTree* Promotion::CreateReadBack(Compiler* compiler, unsigned structLclNum, co { GenTree* value = compiler->gtNewLclFldNode(structLclNum, replacement.AccessType, replacement.Offset); GenTree* store = compiler->gtNewStoreLclVarNode(replacement.LclNum, value); + + if (!compiler->lvaGetDesc(structLclNum)->lvDoNotEnregister) + { + compiler->lvaSetVarDoNotEnregister(structLclNum DEBUGARG(DoNotEnregisterReason::LocalField)); + } return store; } diff --git a/src/coreclr/jit/promotion.h b/src/coreclr/jit/promotion.h index c0b65bef76a3e..46394e22481cd 100644 --- a/src/coreclr/jit/promotion.h +++ b/src/coreclr/jit/promotion.h @@ -12,7 +12,7 @@ struct Replacement { unsigned Offset; var_types AccessType; - unsigned LclNum; + unsigned LclNum = BAD_VAR_NUM; // Is the replacement local (given by LclNum) fresher than the value in the struct local? bool NeedsWriteBack = true; // Is the value in the struct local fresher than the replacement local? @@ -21,16 +21,10 @@ struct Replacement // back before transferring control if necessary. bool NeedsReadBack = false; #ifdef DEBUG - const char* Description; + const char* Description = ""; #endif - Replacement(unsigned offset, var_types accessType, unsigned lclNum DEBUGARG(const char* description)) - : Offset(offset) - , AccessType(accessType) - , LclNum(lclNum) -#ifdef DEBUG - , Description(description) -#endif + Replacement(unsigned offset, var_types accessType) : Offset(offset), AccessType(accessType) { }