From a2ecd569d4d5e1f98694084af914459581ecb851 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Tue, 28 Feb 2023 08:37:57 -0800 Subject: [PATCH 1/3] JIT: initial implementation of profile synthesis Implements a profile synthesis algorithm based on the classic Wu-Larus paper (Static branch frequency and program profile analysis, Micro-27, 1994), with a simple set of heuristics. First step is construction of a depth-first spanning tree (DFST) for the flowgraph, and corresponding reverse postorder (RPO). Together these drive loop recognition; currently we only recognize reducible loops. We use DFST (non-) ancestry as a proxy for (non-) domination: the dominator of a node is required to be a DFST ancestor. So no explicit dominance computation is needed. Irreducible loops are noted but ignored. Loop features like entry, back, and exit edges, body sets, and nesting are computed and saved. Next step is assignment of edge likelihoods. Here we use some simple local heuristics based on loop structure, returns, and throws. A final heuristic gives slight preference to conditional branches that fall through to the next IL offset. After that we use loop nesting to compute the "cyclic probability" $cp$ for each loop, working inner to outer in loops and RPO within loops. $cp$ summarizes the effect of flow through the loop and around loop back edges. We cap $cp$ at no more than 1000. When finding $cp$ for outer loops we use $cp$ for inner loops. Once all $cp$ values are known, we assign "external" input weights to method and EH entry points, and then a final RPO walk computes the expected weight of each block (and, via edge likelihoods, each edge). We use the existing DFS code to build the DFST and the RPO, augmented by some fixes to ensure all blocks (even ones in isolated cycles) get numbered. This initial version is intended to establish the right functionality, enable wider correctness testing, and to provide a foundation for refinement of the heuristics. It is not yet as efficient as it could be. The loop recognition and recording done here overlaps with similar code elsewhere in the JIT. The version here is structural and not sensitive to IL patterns, so is arguably more general and I believe a good deal simpler than the lexically driven recognition we use for the current loop optimizer. I aspire to reconcile this somehow in future work. All this is disabled by default; a new config option either enables using synthesis to set block weights for all root methods or just those without PGO data. Synthesis for inlinees is not yet enabled; progress here is blocked by #82755. --- src/coreclr/jit/CMakeLists.txt | 1 + src/coreclr/jit/block.cpp | 4 +- src/coreclr/jit/compiler.h | 2 + src/coreclr/jit/fgopt.cpp | 14 + src/coreclr/jit/fgprofile.cpp | 17 + src/coreclr/jit/fgprofilesynthesis.cpp | 917 +++++++++++++++++++++++++ src/coreclr/jit/fgprofilesynthesis.h | 102 +++ src/coreclr/jit/jitconfigvalues.h | 5 +- 8 files changed, 1059 insertions(+), 3 deletions(-) create mode 100644 src/coreclr/jit/fgprofilesynthesis.cpp create mode 100644 src/coreclr/jit/fgprofilesynthesis.h diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index 50e4706c329f1..b50590b5b3bd9 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -113,6 +113,7 @@ set( JIT_SOURCES fginline.cpp fgopt.cpp fgprofile.cpp + fgprofilesynthesis.cpp fgstmt.cpp flowgraph.cpp forwardsub.cpp diff --git a/src/coreclr/jit/block.cpp b/src/coreclr/jit/block.cpp index d81a8ae34bd9e..20f48f5d096a4 100644 --- a/src/coreclr/jit/block.cpp +++ b/src/coreclr/jit/block.cpp @@ -1431,15 +1431,15 @@ BasicBlock* Compiler::bbNewBasicBlock(BBjumpKinds jumpKind) /* Give the block a number, set the ancestor count and weight */ ++fgBBcount; - ++fgBBNumMax; if (compIsForInlining()) { block->bbNum = ++impInlineInfo->InlinerCompiler->fgBBNumMax; + fgBBNumMax = block->bbNum; } else { - block->bbNum = fgBBNumMax; + block->bbNum = ++fgBBNumMax; } if (compRationalIRForm) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index a846e040ec33c..63f95736449ff 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -86,6 +86,7 @@ class CSE_DataFlow; // defined in OptCSE.cpp class OptBoolsDsc; // defined in optimizer.cpp struct RelopImplicationInfo; // defined in redundantbranchopts.cpp struct JumpThreadInfo; // defined in redundantbranchopts.cpp +class ProfileSynthesis; // defined in profilesynthesis.h #ifdef DEBUG struct IndentStack; #endif @@ -1993,6 +1994,7 @@ class Compiler friend class SharedTempsScope; friend class CallArgs; friend class IndirectCallTransformer; + friend class ProfileSynthesis; #ifdef FEATURE_HW_INTRINSICS friend struct HWIntrinsicInfo; diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 407265d682472..1b450dff82fed 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -789,6 +789,20 @@ void Compiler::fgDfsReversePostorder() } } + // If there are still unvisited blocks (say isolated cycles), visit them too. + // + if (preorderIndex != fgBBcount + 1) + { + JITDUMP("DFS: flow graph has some isolated cycles, doing extra traversals\n"); + for (BasicBlock* const block : Blocks()) + { + if (!BlockSetOps::IsMember(this, visited, block->bbNum)) + { + fgDfsReversePostorderHelper(block, visited, preorderIndex, postorderIndex); + } + } + } + // After the DFS reverse postorder is completed, we must have visited all the basic blocks. noway_assert(preorderIndex == fgBBcount + 1); noway_assert(postorderIndex == fgBBcount + 1); diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index 8e86bc9737302..180e2b11d2b8e 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -7,6 +7,8 @@ #pragma hdrstop #endif +#include "fgprofilesynthesis.h" + // Flowgraph Profile Support //------------------------------------------------------------------------ @@ -2414,6 +2416,21 @@ PhaseStatus Compiler::fgIncorporateProfileData() return PhaseStatus::MODIFIED_EVERYTHING; } +#ifdef DEBUG + // Optionally just run synthesis + // + if ((JitConfig.JitSynthesizeCounts() > 0) && !compIsForInlining()) + { + if ((JitConfig.JitSynthesizeCounts() == 1) || ((JitConfig.JitSynthesizeCounts() == 2) && !fgHaveProfileData())) + { + JITDUMP("Synthesizing profile data\n"); + ProfileSynthesis::Run(this, ProfileSynthesisOption::AssignLikelihoods); + fgPgoHaveWeights = false; + return PhaseStatus::MODIFIED_EVERYTHING; + } + } +#endif + // Do we have profile data? // if (!fgHaveProfileData()) diff --git a/src/coreclr/jit/fgprofilesynthesis.cpp b/src/coreclr/jit/fgprofilesynthesis.cpp new file mode 100644 index 0000000000000..77af162739749 --- /dev/null +++ b/src/coreclr/jit/fgprofilesynthesis.cpp @@ -0,0 +1,917 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "jitpch.h" + +#ifdef _MSC_VER +#pragma hdrstop +#endif + +#include "fgprofilesynthesis.h" + +// TODO +// +// * faster way of doing fgGetPredForBlock +// * vet against some real data +// * IR based heuristics (perhaps) +// * PGO blend modes, random, etc +// * Repair mode +// * Inlinee flow graphs +// * During Cp, avoid repeatedly propagating through nested loops +// * Fake BB0 or always force scratch BB +// * Reconcile with our other loop finding stuff +// * Stop the upweight/downweight of loops in rest of jit +// * Durable edge properties (exit, back) +// * Proper weight comp for finallies +// * Tweak RunRarely to be at or near zero +// * OSR entry weight +// * Special handling for deep nests? +// * Plan for irreducible cases -- MoveNext's + +//------------------------------------------------------------------------ +// fgProfileSynthesis: update edge likelihoods and block counts based +// on various strategies +// +// Arguments: +// options - options to control synthesis +// +void ProfileSynthesis::Run(ProfileSynthesisOption option) +{ + // Just root instances for now. + // Need to sort out issues with flow analysis for inlinees. + // + if (m_comp->compIsForInlining()) + { + return; + } + + BuildReversePostorder(); + FindLoops(); + + // Retain or compute edge likelihood information + // + switch (option) + { + case ProfileSynthesisOption::AssignLikelihoods: + AssignLikelihoods(); + break; + + case ProfileSynthesisOption::RetainLikelihoods: + break; + + case ProfileSynthesisOption::BlendLikelihoods: + BlendLikelihoods(); + break; + + case ProfileSynthesisOption::ResetAndSynthesize: + ClearLikelihoods(); + AssignLikelihoods(); + break; + + case ProfileSynthesisOption::ReverseLikelihoods: + ReverseLikelihoods(); + break; + + case ProfileSynthesisOption::RandomLikelihoods: + RandomizeLikelihoods(); + break; + + default: + assert(!"unexpected profile synthesis option"); + break; + } + + // Determine cyclic probabilities + // + ComputeCyclicProbabilities(); + + // Assign weights to entry points in the flow graph + // + AssignInputWeights(); + + // Compute the block weights given the inputs and edge likelihoods + // + ComputeBlockWeights(); +} + +//------------------------------------------------------------------------ +// AssignLikelihoods: update edge likelihoods and block counts based +// entrely on heuristics. +// +// Notes: +// any existing likelihoods are removed in favor of heuristic +// likelihoods +// +void ProfileSynthesis::AssignLikelihoods() +{ + JITDUMP("Assigning edge likelihoods based on heuristics\n"); + + for (BasicBlock* const block : m_comp->Blocks()) + { + switch (block->bbJumpKind) + { + case BBJ_THROW: + case BBJ_RETURN: + case BBJ_EHFINALLYRET: + // No successor cases + // (todo: finally ret may have succs) + break; + + case BBJ_NONE: + case BBJ_CALLFINALLY: + // Single successor next cases + // + // Note we handle flow to the finally + // specially; this represents return + // from the finally. + AssignLikelihoodNext(block); + break; + + case BBJ_ALWAYS: + case BBJ_LEAVE: + case BBJ_EHCATCHRET: + case BBJ_EHFILTERRET: + // Single successor jump cases + AssignLikelihoodJump(block); + break; + + case BBJ_COND: + // Two successor cases + AssignLikelihoodCond(block); + break; + + case BBJ_SWITCH: + // N successor cases + AssignLikelihoodSwitch(block); + break; + + default: + unreached(); + } + } +} + +//------------------------------------------------------------------------ +// IsDfsAncestor: see if block `x` is ancestor of block `y` in the depth +// first spanning tree +// +// Arguments: +// x -- block that is possible ancestor +// y -- block that is possible descendant +// +// Returns: +// True if x is ancestor of y in the depth first spanning tree. +// +// Notes: +// If return value is false, then x does not dominate y. +// +bool ProfileSynthesis::IsDfsAncestor(BasicBlock* x, BasicBlock* y) +{ + return ((x->bbPreorderNum <= y->bbPreorderNum) && (y->bbPostorderNum <= x->bbPostorderNum)); +} + +//------------------------------------------------------------------------ +// IsLoopHeader: see if a block is a loop header, and if so return +// the associated loop. +// +// Arguments: +// block - block in question +// +// Returns: +// loop headed by block, or nullptr +// +SimpleLoop* ProfileSynthesis::IsLoopHeader(BasicBlock* block) +{ + for (SimpleLoop* loop : *m_loops) + { + if (loop->m_head == block) + { + return loop; + } + } + + return nullptr; +} + +//------------------------------------------------------------------------ +// IsLoopBackEdge: see if an edge is a loop back edge +// +// Arguments: +// edge - edge in question +// +// Returns: +// True if edge is a backedge in some recognized loop. +// +// Notes: +// Different than asking IsDfsAncestor since we disqualify some +// natural backedges for complex loop strctures. +// +// Todo: +// Annotate the edge directly +// +bool ProfileSynthesis::IsLoopBackEdge(FlowEdge* edge) +{ + for (SimpleLoop* loop : *m_loops) + { + for (FlowEdge* loopBackEdge : loop->m_backEdges) + { + if (loopBackEdge == edge) + { + return true; + } + } + } + + return false; +} + +//------------------------------------------------------------------------ +// IsLoopExitEdge: see if a flow edge is a loop exit edge +// +// Arguments: +// edge - edge in question +// +// Returns: +// True if edge is an exit edge in some recognized loop +// +// Todo: +// Annotate the edge directly +// +// Decide if we want to report that the edge exits +// multiple loops. + +bool ProfileSynthesis::IsLoopExitEdge(FlowEdge* edge) +{ + for (SimpleLoop* loop : *m_loops) + { + for (FlowEdge* loopExitEdge : loop->m_exitEdges) + { + if (loopExitEdge == edge) + { + return true; + } + } + } + + return false; +} + +//------------------------------------------------------------------------ +// AssignLikelihoodNext: update edge likelihood for block that always +// transfers control to bbNext +// +// Arguments; +// block -- block in question +// +void ProfileSynthesis::AssignLikelihoodNext(BasicBlock* block) +{ + FlowEdge* const edge = m_comp->fgGetPredForBlock(block->bbNext, block); + edge->setLikelihood(1.0); +} + +//------------------------------------------------------------------------ +// AssignLikelihoodJump: update edge likelihood for a block that always +// transfers control to bbJumpDest +// +// Arguments; +// block -- block in question +// +void ProfileSynthesis::AssignLikelihoodJump(BasicBlock* block) +{ + FlowEdge* const edge = m_comp->fgGetPredForBlock(block->bbJumpDest, block); + edge->setLikelihood(1.0); +} + +//------------------------------------------------------------------------ +// AssignLikelihoodCond: update edge likelihood for a block that +// ends in a conditional branch +// +// Arguments; +// block -- block in question (BBJ_COND) +// +void ProfileSynthesis::AssignLikelihoodCond(BasicBlock* block) +{ + BasicBlock* const jump = block->bbJumpDest; + BasicBlock* const next = block->bbNext; + + // Watch for degenerate case + // + if (jump == next) + { + AssignLikelihoodNext(block); + return; + } + + FlowEdge* const jumpEdge = m_comp->fgGetPredForBlock(jump, block); + FlowEdge* const nextEdge = m_comp->fgGetPredForBlock(next, block); + + // THROW heuristic + // + bool const isJumpThrow = (jump->bbJumpKind == BBJ_THROW); + bool const isNextThrow = (next->bbJumpKind == BBJ_THROW); + + if (isJumpThrow != isNextThrow) + { + if (isJumpThrow) + { + jumpEdge->setLikelihood(0.0); + nextEdge->setLikelihood(1.0); + } + else + { + jumpEdge->setLikelihood(1.0); + nextEdge->setLikelihood(0.0); + } + + return; + } + + // LOOP BACK EDGE heuristic + // + bool const isJumpEdgeBackEdge = IsLoopBackEdge(jumpEdge); + bool const isNextEdgeBackEdge = IsLoopBackEdge(nextEdge); + + if (isJumpEdgeBackEdge != isNextEdgeBackEdge) + { + if (isJumpEdgeBackEdge) + { + JITDUMP(FMT_BB "->" FMT_BB " is loop back edge\n", block->bbNum, jump->bbNum); + jumpEdge->setLikelihood(0.9); + nextEdge->setLikelihood(0.1); + } + else + { + JITDUMP(FMT_BB "->" FMT_BB " is loop back edge\n", block->bbNum, next->bbNum); + jumpEdge->setLikelihood(0.1); + nextEdge->setLikelihood(0.9); + } + + return; + } + + // LOOP EXIT EDGE heuristic + // + // Consider: adjust probability if loop has multiple exit edges, so that + // overall exit probability is around 0.1. + // + bool const isJumpEdgeExitEdge = IsLoopExitEdge(jumpEdge); + bool const isNextEdgeExitEdge = IsLoopExitEdge(nextEdge); + + if (isJumpEdgeExitEdge != isNextEdgeExitEdge) + { + if (isJumpEdgeExitEdge) + { + JITDUMP(FMT_BB "->" FMT_BB " is loop exit edge\n", block->bbNum, jump->bbNum); + jumpEdge->setLikelihood(0.1); + nextEdge->setLikelihood(0.9); + } + else + { + JITDUMP(FMT_BB "->" FMT_BB " is loop exit edge\n", block->bbNum, next->bbNum); + jumpEdge->setLikelihood(0.9); + nextEdge->setLikelihood(0.1); + } + + return; + } + + // RETURN heuristic + // + bool const isJumpReturn = (jump->bbJumpKind == BBJ_RETURN); + bool const isNextReturn = (next->bbJumpKind == BBJ_RETURN); + + if (isJumpReturn != isNextReturn) + { + if (isJumpReturn) + { + jumpEdge->setLikelihood(0.2); + nextEdge->setLikelihood(0.8); + } + else + { + jumpEdge->setLikelihood(0.8); + nextEdge->setLikelihood(0.2); + } + + return; + } + + // IL OFFSET heuristic + // + // Give slight preference to bbNext + // + jumpEdge->setLikelihood(0.48); + nextEdge->setLikelihood(0.52); +} + +//------------------------------------------------------------------------ +// AssignLikelihoodCond: update edge likelihood for a block that +// ends in a switch +// +// Arguments; +// block -- block in question (BBJ_SWITCH) +// +void ProfileSynthesis::AssignLikelihoodSwitch(BasicBlock* block) +{ + // Assume each switch case is equally probable + // + const unsigned n = block->NumSucc(); + const weight_t p = 1 / (weight_t)n; + + // Each unique edge gets some multiple of that basic probability + // + for (BasicBlock* const succ : block->Succs(m_comp)) + { + FlowEdge* const edge = m_comp->fgGetPredForBlock(succ, block); + edge->setLikelihood(p * edge->getDupCount()); + } +} + +//------------------------------------------------------------------------ +// fgBlendLikelihoods: blend existing and heuristic likelihoods for all blocks +// +void ProfileSynthesis::BlendLikelihoods() +{ +} + +void ProfileSynthesis::ClearLikelihoods() +{ +} + +void ProfileSynthesis::ReverseLikelihoods() +{ +} + +void ProfileSynthesis::RandomizeLikelihoods() +{ +} + +//------------------------------------------------------------------------ +// fgBuildReversePostorder: compute depth first spanning tree and pre +// and post numbers for the blocks +// +void ProfileSynthesis::BuildReversePostorder() +{ + m_comp->EnsureBasicBlockEpoch(); + m_comp->fgBBReversePostorder = new (m_comp, CMK_Pgo) BasicBlock*[m_comp->fgBBNumMax + 1]{}; + m_comp->fgComputeEnterBlocksSet(); + m_comp->fgDfsReversePostorder(); + + // Build map from bbNum to Block*. + // + m_bbNumToBlockMap = new (m_comp, CMK_Pgo) BasicBlock*[m_comp->fgBBNumMax + 1]{}; + for (BasicBlock* const block : m_comp->Blocks()) + { + m_bbNumToBlockMap[block->bbNum] = block; + } + +#ifdef DEBUG + if (m_comp->verbose) + { + printf("\nAfter doing a post order traversal of the BB graph, this is the ordering:\n"); + for (unsigned i = 1; i <= m_comp->fgBBNumMax; ++i) + { + printf("%02u -> " FMT_BB "\n", i, m_comp->fgBBReversePostorder[i]->bbNum); + } + printf("\n"); + } +#endif // DEBUG +} + +//------------------------------------------------------------------------ +// FindLoops: locate and classify loops +// +void ProfileSynthesis::FindLoops() +{ + CompAllocator allocator = m_comp->getAllocator(CMK_Pgo); + m_loops = new (allocator) LoopVector(allocator); + unsigned improperLoopCount = 0; + + // Identify loops + // + for (unsigned i = 1; i <= m_comp->fgBBNumMax; i++) + { + BasicBlock* const block = m_comp->fgBBReversePostorder[i]; + + // If a block is a DFS ancestor of one if its predecessors then the block is a loop header. + // + SimpleLoop* loop = nullptr; + + for (FlowEdge* predEdge : block->PredEdges()) + { + if (IsDfsAncestor(block, predEdge->getSourceBlock())) + { + if (loop == nullptr) + { + loop = new (allocator) SimpleLoop(block, allocator); + JITDUMP("\n"); + } + + JITDUMP(FMT_BB " -> " FMT_BB " is a backedge\n", predEdge->getSourceBlock()->bbNum, block->bbNum); + loop->m_backEdges.push_back(predEdge); + } + } + + if (loop == nullptr) + { + continue; + } + + JITDUMP(FMT_BB " is head of a DFS loop with %d back edges\n", block->bbNum, loop->m_backEdges.size()); + + // Now walk back in flow along the back edges from block to determine if + // this is a natural loop and to find all the blocks in the loop. + // + loop->m_blocks = BlockSetOps::MakeEmpty(m_comp); + BlockSetOps::AddElemD(m_comp, loop->m_blocks, block->bbNum); + + // todo: hoist this out and just do a reset here + jitstd::list worklist(allocator); + + // Seed the worklist + // + for (FlowEdge* backEdge : loop->m_backEdges) + { + BasicBlock* const backEdgeSource = backEdge->getSourceBlock(); + + if (BlockSetOps::IsMember(m_comp, loop->m_blocks, backEdgeSource->bbNum)) + { + continue; + } + + worklist.push_back(backEdgeSource); + } + + bool isNaturalLoop = true; + + // Work back through flow to loop head or to another pred + // that is clearly outside the loop. + // + // TODO: verify that we can indeed get back to the loop head + // and not get stopped somewhere (eg loop through EH). + // + while (!worklist.empty() & isNaturalLoop) + { + BasicBlock* const loopBlock = worklist.back(); + worklist.pop_back(); + BlockSetOps::AddElemD(m_comp, loop->m_blocks, loopBlock->bbNum); + + for (FlowEdge* const predEdge : loopBlock->PredEdges()) + { + BasicBlock* const predBlock = predEdge->getSourceBlock(); + + // `block` cannot dominate `predBlock` unless it is a DFS ancestor. + // + if (!IsDfsAncestor(block, predBlock)) + { + // Does this represent flow out of some handler? + // If so we will ignore it. + // + // Might want to vet that handler's try region entry + // is a dfs ancestor...? + // + if (!BasicBlock::sameHndRegion(block, predBlock)) + { + continue; + } + + JITDUMP("Loop is not natural; witness " FMT_BB " -> " FMT_BB "\n", predBlock->bbNum, + loopBlock->bbNum); + + isNaturalLoop = false; + improperLoopCount++; + break; + } + + if (BlockSetOps::IsMember(m_comp, loop->m_blocks, predBlock->bbNum)) + { + continue; + } + + worklist.push_back(predBlock); + } + } + + if (!isNaturalLoop) + { + continue; + } + + JITDUMP("Loop has %d blocks\n", BlockSetOps::Count(m_comp, loop->m_blocks)); + + // Find the exit edges + // + BlockSetOps::Iter iter(m_comp, loop->m_blocks); + unsigned bbNum = 0; + while (iter.NextElem(&bbNum)) + { + BasicBlock* const loopBlock = m_bbNumToBlockMap[bbNum]; + + for (BasicBlock* const succBlock : loopBlock->Succs()) + { + if (!BlockSetOps::IsMember(m_comp, loop->m_blocks, succBlock->bbNum)) + { + FlowEdge* const exitEdge = m_comp->fgGetPredForBlock(succBlock, loopBlock); + JITDUMP(FMT_BB " -> " FMT_BB " is an exit edge\n", loopBlock->bbNum, succBlock->bbNum); + loop->m_exitEdges.push_back(exitEdge); + } + } + } + + // Find the entry edges + // + // Note if fgEntryBB is a loop head we won't have an entry edge. + // So it needs to be special cased later on when processing + // entry edges. + // + for (FlowEdge* const predEdge : loop->m_head->PredEdges()) + { + if (!IsDfsAncestor(block, predEdge->getSourceBlock())) + { + JITDUMP(FMT_BB " -> " FMT_BB " is an entry edge\n", predEdge->getSourceBlock()->bbNum, + loop->m_head->bbNum); + loop->m_entryEdges.push_back(predEdge); + } + } + + // Search for parent loop, validate proper nesting. + // + // Since loops record in outer->inner order the parent will be the + // most recently recorded loop that contains this loop's header. + // + for (auto it = m_loops->rbegin(), itEnd = m_loops->rend(); it != itEnd; ++it) + { + SimpleLoop* const otherLoop = *it; + + if (BlockSetOps::IsMember(m_comp, otherLoop->m_blocks, block->bbNum)) + { + // Ancestor loop; should contain all blocks of this loop + // + assert(BlockSetOps::IsSubset(m_comp, loop->m_blocks, otherLoop->m_blocks)); + + if (loop->m_parent == nullptr) + { + loop->m_parent = otherLoop; + loop->m_depth = otherLoop->m_depth + 1; + JITDUMP("at depth %u, nested within loop starting at " FMT_BB "\n", loop->m_depth, + otherLoop->m_head->bbNum); + + // Note we could break here but that would bypass the non-overlap check + // just below, so for now we check against all known loops. + } + } + else + { + // Non-ancestor loop; should have no blocks in common with current loop + // + assert(BlockSetOps::IsEmptyIntersection(m_comp, loop->m_blocks, otherLoop->m_blocks)); + } + } + + if (loop->m_parent == nullptr) + { + JITDUMP("top-level loop\n"); + loop->m_depth = 1; + } + + // Record this loop + // + m_loops->push_back(loop); + } + + if (m_loops->size() > 0) + { + JITDUMP("\nFound %d loops\n", m_loops->size()); + } + + if (improperLoopCount > 0) + { + JITDUMP("Rejected %d loops\n", improperLoopCount); + } +} + +//------------------------------------------------------------------------ +// FindCyclicProbabilities: for each loop, compute how much flow returns +// to the loop head given one external count. +// +void ProfileSynthesis::ComputeCyclicProbabilities() +{ + // We found loop walking in reverse postorder, so the loop vector + // is naturally organized with outer loops before inner. + // + // Walk it backwards here so we compute inner loop cyclic probabilities + // first. We rely on that when processing outer loops. + // + for (auto it = m_loops->rbegin(), itEnd = m_loops->rend(); it != itEnd; ++it) + { + SimpleLoop* const loop = *it; + ComputeCyclicProbabilities(loop); + } +} + +//------------------------------------------------------------------------ +// FindCyclicProbabilities: for a given loop, compute how much flow returns +// to the loop head given one external count. +// +void ProfileSynthesis::ComputeCyclicProbabilities(SimpleLoop* loop) +{ + // Initialize + // + BlockSetOps::Iter iter(m_comp, loop->m_blocks); + unsigned bbNum = 0; + while (iter.NextElem(&bbNum)) + { + BasicBlock* const loopBlock = m_bbNumToBlockMap[bbNum]; + loopBlock->bbWeight = 0.0; + } + + // Process loop blocks in RPO. Just takes one pass through the loop blocks + // as any cyclic contributions are handled by cyclic probabilities. + // + for (unsigned int i = 1; i <= m_comp->fgBBNumMax; i++) + { + BasicBlock* const block = m_comp->fgBBReversePostorder[i]; + + if (!BlockSetOps::IsMember(m_comp, loop->m_blocks, block->bbNum)) + { + continue; + } + + // Loop head gets external count of 1 + // + if (block == loop->m_head) + { + JITDUMP("ccp: " FMT_BB " :: 1.0\n", block->bbNum); + block->bbWeight = 1.0; + } + else + { + SimpleLoop* nestedLoop = IsLoopHeader(block); + + if (nestedLoop != nullptr) + { + // We should have figured this out already. + // + assert(nestedLoop->m_cyclicProbability != 0); + + // Sum entry edges, multply by Cp + // + weight_t newWeight = 0.0; + + for (FlowEdge* const edge : nestedLoop->m_entryEdges) + { + if (!BasicBlock::sameHndRegion(block, edge->getSourceBlock())) + { + continue; + } + + newWeight += edge->getLikelyWeight(); + } + + newWeight *= nestedLoop->m_cyclicProbability; + block->bbWeight = newWeight; + + JITDUMP("ccp (nested header): " FMT_BB " :: " FMT_WT "\n", block->bbNum, newWeight); + } + else + { + weight_t newWeight = 0.0; + + for (FlowEdge* const edge : block->PredEdges()) + { + if (!BasicBlock::sameHndRegion(block, edge->getSourceBlock())) + { + continue; + } + + newWeight += edge->getLikelyWeight(); + } + + block->bbWeight = newWeight; + + JITDUMP("ccp: " FMT_BB " :: " FMT_WT "\n", block->bbNum, newWeight); + } + } + } + + // Now look at cyclic flow back to the head block. + // + weight_t cyclicWeight = 0; + bool capped = false; + + for (FlowEdge* const edge : loop->m_backEdges) + { + JITDUMP("ccp backedge " FMT_BB " (" FMT_WT ") -> " FMT_BB " likelihood " FMT_WT "\n", + edge->getSourceBlock()->bbNum, edge->getSourceBlock()->bbWeight, loop->m_head->bbNum, + edge->getLikelihood()); + + cyclicWeight += edge->getLikelyWeight(); + } + + // Allow for a bit of rounding error, but not too much. + // (todo: decrease loop gain if we are in a deep nest?) + // assert(cyclicWeight <= 1.01); + // + if (cyclicWeight > 0.999) + { + capped = true; + cyclicWeight = 0.999; + } + + weight_t cyclicProbability = 1.0 / (1.0 - cyclicWeight); + + JITDUMP("For loop at " FMT_BB " cyclic weight is " FMT_WT " cyclic probability is " FMT_WT "%s\n", + loop->m_head->bbNum, cyclicWeight, cyclicProbability, capped ? " [capped]" : ""); + + loop->m_cyclicProbability = cyclicProbability; +} + +//------------------------------------------------------------------------ +// fgAssignInputWeights: provide initial profile weights for all blocks +// +// +// Notes: +// For finallys we may want to come back and rescale once we know the +// weights of all the callfinallies, or perhaps just use the weight +// of the first block in the associated try. +// +void ProfileSynthesis::AssignInputWeights() +{ + const weight_t entryWeight = BB_UNITY_WEIGHT; + const weight_t ehWeight = entryWeight / 1000; + + for (BasicBlock* block : m_comp->Blocks()) + { + block->bbWeight = 0.0; + } + + m_comp->fgFirstBB->bbWeight = entryWeight; + + for (EHblkDsc* const HBtab : EHClauses(m_comp)) + { + if (HBtab->HasFilter()) + { + HBtab->ebdFilter->bbWeight = ehWeight; + } + + HBtab->ebdHndBeg->bbWeight = ehWeight; + } +} + +//------------------------------------------------------------------------ +// ComputeBlockWeights: compute weights for all blocks +// based on input weights, edge likelihoods, and cyclic probabilities +// +void ProfileSynthesis::ComputeBlockWeights() +{ + JITDUMP("Computing block weights\n"); + + for (unsigned int i = 1; i <= m_comp->fgBBNumMax; i++) + { + BasicBlock* const block = m_comp->fgBBReversePostorder[i]; + SimpleLoop* loop = IsLoopHeader(block); + + if (loop != nullptr) + { + // Start with initial weight, sum entry edges, multiply by Cp + // + weight_t newWeight = block->bbWeight; + + for (FlowEdge* const edge : loop->m_entryEdges) + { + newWeight += edge->getLikelyWeight(); + } + + newWeight *= loop->m_cyclicProbability; + block->bbWeight = newWeight; + + JITDUMP("cbw (header): " FMT_BB " :: " FMT_WT "\n", block->bbNum, block->bbWeight); + } + else + { + // start with initial weight, sum all incoming edges + // + weight_t newWeight = block->bbWeight; + + for (FlowEdge* const edge : block->PredEdges()) + { + newWeight += edge->getLikelyWeight(); + } + + block->bbWeight = newWeight; + + JITDUMP("cbw: " FMT_BB " :: " FMT_WT "\n", block->bbNum, block->bbWeight); + } + + // Todo: just use weight to determine run rarely, not flag + // + if (block->bbWeight == 0.0) + { + block->bbSetRunRarely(); + } + else + { + block->bbFlags &= ~BBF_RUN_RARELY; + } + } +} diff --git a/src/coreclr/jit/fgprofilesynthesis.h b/src/coreclr/jit/fgprofilesynthesis.h new file mode 100644 index 0000000000000..22b4b5cba69e4 --- /dev/null +++ b/src/coreclr/jit/fgprofilesynthesis.h @@ -0,0 +1,102 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef _FGPROFILESYNTHESIS_H_ +#define _FGPROFILESYNTHESIS_H_ + +#include "compiler.h" +#include "jitstd.h" + +// Flowgraph Profile Synthesis + +typedef jitstd::vector EdgeVector; + +struct SimpleLoop +{ + SimpleLoop(BasicBlock* head, CompAllocator allocator) + : m_head(head) + , m_parent(nullptr) + , m_blocks(BlockSetOps::UninitVal()) + , m_entryEdges(allocator) + , m_backEdges(allocator) + , m_exitEdges(allocator) + , m_cyclicProbability(0) + , m_depth(0) + { + } + + BasicBlock* m_head; + SimpleLoop* m_parent; + BlockSet m_blocks; + EdgeVector m_entryEdges; + EdgeVector m_backEdges; + EdgeVector m_exitEdges; + weight_t m_cyclicProbability; + unsigned m_depth; +}; + +typedef jitstd::vector LoopVector; + +//------------------------------------------------------------------------ +// ProfileSynthesisOption: specify behavior of profile synthesis +// +enum class ProfileSynthesisOption +{ + AssignLikelihoods, + RetainLikelihoods, + BlendLikelihoods, + ResetAndSynthesize, + ReverseLikelihoods, + RandomLikelihoods, +}; + +//------------------------------------------------------------------------ +// ProfileSynthesis: synthesize, repair, or alter existing profile information +// +class ProfileSynthesis +{ +public: + static void Run(Compiler* compiler, ProfileSynthesisOption option) + { + ProfileSynthesis p(compiler); + p.Run(option); + } + +private: + ProfileSynthesis(Compiler* compiler) : m_comp(compiler), m_loops(nullptr), m_bbNumToBlockMap(nullptr) + { + } + + void Run(ProfileSynthesisOption option); + + void BuildReversePostorder(); + static bool IsDfsAncestor(BasicBlock* x, BasicBlock* y); + bool IsLoopBackEdge(FlowEdge* edge); + bool IsLoopExitEdge(FlowEdge* edge); + + void FindLoops(); + SimpleLoop* IsLoopHeader(BasicBlock* block); + + void AssignLikelihoods(); + void AssignLikelihoodNext(BasicBlock* block); + void AssignLikelihoodJump(BasicBlock* block); + void AssignLikelihoodCond(BasicBlock* block); + void AssignLikelihoodSwitch(BasicBlock* block); + void BlendLikelihoods(); + void ClearLikelihoods(); + void ReverseLikelihoods(); + void RandomizeLikelihoods(); + + void ComputeCyclicProbabilities(); + void ComputeCyclicProbabilities(SimpleLoop* loop); + + void AssignInputWeights(); + void ComputeBlockWeights(); + +private: + Compiler* const m_comp; + LoopVector* m_loops; + BasicBlock** m_bbNumToBlockMap; +}; + +#endif // !_FGPROFILESYNTHESIS_H_ diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 4c44490bb35ed..adddbaef7d8a2 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -593,7 +593,10 @@ CONFIG_INTEGER(JitCrossCheckDevirtualizationAndPGO, W("JitCrossCheckDevirtualiza CONFIG_INTEGER(JitNoteFailedExactDevirtualization, W("JitNoteFailedExactDevirtualization"), 0) CONFIG_INTEGER(JitRandomlyCollect64BitCounts, W("JitRandomlyCollect64BitCounts"), 0) // Collect 64-bit counts randomly // for some methods. -#endif // debug +// 1: profile synthesis for root methods +// 2: profile synthesis for foot methods w/o pgo data +CONFIG_INTEGER(JitSynthesizeCounts, W("JitSynthesizeCounts"), 0) +#endif // Devirtualize virtual calls with getExactClasses (NativeAOT only for now) CONFIG_INTEGER(JitEnableExactDevirtualization, W("JitEnableExactDevirtualization"), 1) From 84cf8edeec903196f244c8ca2ddbeb159faa6ca1 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 3 Mar 2023 09:47:04 -0800 Subject: [PATCH 2/3] fix --- src/coreclr/jit/block.h | 22 --------- src/coreclr/jit/fgopt.cpp | 95 +++++++++++++++++++++++++-------------- 2 files changed, 61 insertions(+), 56 deletions(-) diff --git a/src/coreclr/jit/block.h b/src/coreclr/jit/block.h index 6b049d00737be..7ae2b6d32cb4f 100644 --- a/src/coreclr/jit/block.h +++ b/src/coreclr/jit/block.h @@ -1946,28 +1946,6 @@ inline PredBlockList::iterator& PredBlockList::iterator::operator++() return *this; } -// This enum represents a pre/post-visit action state to emulate a depth-first -// spanning tree traversal of a tree or graph. -enum DfsStackState -{ - DSS_Invalid, // The initialized, invalid error state - DSS_Pre, // The DFS pre-order (first visit) traversal state - DSS_Post // The DFS post-order (last visit) traversal state -}; - -// These structs represents an entry in a stack used to emulate a non-recursive -// depth-first spanning tree traversal of a graph. The entry contains either a -// block pointer or a block number depending on which is more useful. -struct DfsBlockEntry -{ - DfsStackState dfsStackState; // The pre/post traversal action for this entry - BasicBlock* dfsBlock; // The corresponding block for the action - - DfsBlockEntry(DfsStackState state, BasicBlock* basicBlock) : dfsStackState(state), dfsBlock(basicBlock) - { - } -}; - /***************************************************************************** * * The following call-backs supplied by the client; it's used by the code diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 1b450dff82fed..70d29f1de0a66 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -880,12 +880,44 @@ void Compiler::fgDfsReversePostorderHelper(BasicBlock* block, // Assume we haven't visited this node yet (callers ensure this). assert(!BlockSetOps::IsMember(this, visited, block->bbNum)); + struct DfsBlockEntry + { + public: + DfsBlockEntry(Compiler* comp, BasicBlock* block) : m_block(block), m_nSucc(block->NumSucc(comp)), m_iter(0) + { + } + + BasicBlock* getBlock() + { + return m_block; + } + + bool atStart() + { + return m_iter == 0; + } + + BasicBlock* getNextSucc(Compiler* comp) + { + if (m_iter >= m_nSucc) + { + return nullptr; + } + return m_block->GetSucc(m_iter++, comp); + } + + private: + BasicBlock* m_block; + unsigned m_nSucc; + unsigned m_iter; + }; + // Allocate a local stack to hold the DFS traversal actions necessary // to compute pre/post-ordering of the control flowgraph. ArrayStack stack(getAllocator(CMK_ArrayStack)); // Push the first block on the stack to seed the traversal. - stack.Push(DfsBlockEntry(DSS_Pre, block)); + stack.Emplace(this, block); // Flag the node we just visited to avoid backtracking. BlockSetOps::AddElemD(this, visited, block->bbNum); @@ -893,51 +925,46 @@ void Compiler::fgDfsReversePostorderHelper(BasicBlock* block, // The search is terminated once all the actions have been processed. while (!stack.Empty()) { - DfsBlockEntry current = stack.Pop(); - BasicBlock* currentBlock = current.dfsBlock; + DfsBlockEntry& current = stack.TopRef(); + BasicBlock* const block = current.getBlock(); - if (current.dfsStackState == DSS_Pre) + if (current.atStart()) { - // This is a pre-visit that corresponds to the first time the - // node is encountered in the spanning tree and receives pre-order - // numberings. By pushing the post-action on the stack here we - // are guaranteed to only process it after all of its successors - // pre and post actions are processed. - stack.Push(DfsBlockEntry(DSS_Post, currentBlock)); - currentBlock->bbPreorderNum = preorderIndex; + // Initial visit to this node + // + block->bbPreorderNum = preorderIndex; preorderIndex++; - - for (BasicBlock* const succ : currentBlock->Succs(this)) - { - // If this is a node we haven't seen before, go ahead and process - if (!BlockSetOps::IsMember(this, visited, succ->bbNum)) - { - // Push a pre-visit action for this successor onto the stack and - // mark it as visited in case this block has multiple successors - // to the same node (multi-graph). - stack.Push(DfsBlockEntry(DSS_Pre, succ)); - BlockSetOps::AddElemD(this, visited, succ->bbNum); - } - } } - else - { - // This is a post-visit that corresponds to the last time the - // node is visited in the spanning tree and only happens after - // all descendents in the spanning tree have had pre and post - // actions applied. - assert(current.dfsStackState == DSS_Post); - currentBlock->bbPostorderNum = postorderIndex; + BasicBlock* const succ = current.getNextSucc(this); - // Compute the index of this in the reverse postorder and + if (succ == nullptr) + { + // Final visit to this node + // + block->bbPostorderNum = postorderIndex; + + // Compute the index of block in the reverse postorder and // update the reverse postorder accordingly. // assert(postorderIndex <= fgBBcount); unsigned reversePostorderIndex = fgBBcount - postorderIndex + 1; - fgBBReversePostorder[reversePostorderIndex] = currentBlock; + fgBBReversePostorder[reversePostorderIndex] = block; postorderIndex++; + + stack.Pop(); + continue; } + + if (BlockSetOps::IsMember(this, visited, succ->bbNum)) + { + // Already visited this succ + // + continue; + } + + BlockSetOps::AddElemD(this, visited, succ->bbNum); + stack.Emplace(this, succ); } } From bfb77ad94334eba2cccacff1af1cbab0f54d4f72 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Mon, 6 Mar 2023 08:46:29 -0800 Subject: [PATCH 3/3] review feedback --- src/coreclr/jit/CMakeLists.txt | 1 + src/coreclr/jit/fgprofile.cpp | 1 - src/coreclr/jit/fgprofilesynthesis.cpp | 20 +++++++++++++------- src/coreclr/jit/fgprofilesynthesis.h | 2 +- src/coreclr/jit/jitconfigvalues.h | 2 +- 5 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index b50590b5b3bd9..480bfdc045e16 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -296,6 +296,7 @@ set( JIT_HEADERS emitjmps.h emitpub.h error.h + fgprofilesynthesis.h gentree.h gentreeopsdef.h gtlist.h diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index 8d3d19cbc3130..bd2b1984b57a9 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -2436,7 +2436,6 @@ PhaseStatus Compiler::fgIncorporateProfileData() { JITDUMP("Synthesizing profile data\n"); ProfileSynthesis::Run(this, ProfileSynthesisOption::AssignLikelihoods); - fgPgoHaveWeights = false; return PhaseStatus::MODIFIED_EVERYTHING; } } diff --git a/src/coreclr/jit/fgprofilesynthesis.cpp b/src/coreclr/jit/fgprofilesynthesis.cpp index 77af162739749..4260012df26f3 100644 --- a/src/coreclr/jit/fgprofilesynthesis.cpp +++ b/src/coreclr/jit/fgprofilesynthesis.cpp @@ -92,11 +92,16 @@ void ProfileSynthesis::Run(ProfileSynthesisOption option) // Compute the block weights given the inputs and edge likelihoods // ComputeBlockWeights(); + + // For now, since we have installed synthesized profile data, + // act as if we don't have "real" profile data. + // + m_comp->fgPgoHaveWeights = false; } //------------------------------------------------------------------------ // AssignLikelihoods: update edge likelihoods and block counts based -// entrely on heuristics. +// entirely on heuristics. // // Notes: // any existing likelihoods are removed in favor of heuristic @@ -171,7 +176,7 @@ bool ProfileSynthesis::IsDfsAncestor(BasicBlock* x, BasicBlock* y) } //------------------------------------------------------------------------ -// IsLoopHeader: see if a block is a loop header, and if so return +// GetLoopFromHeader: see if a block is a loop header, and if so return // the associated loop. // // Arguments: @@ -180,7 +185,7 @@ bool ProfileSynthesis::IsDfsAncestor(BasicBlock* x, BasicBlock* y) // Returns: // loop headed by block, or nullptr // -SimpleLoop* ProfileSynthesis::IsLoopHeader(BasicBlock* block) +SimpleLoop* ProfileSynthesis::GetLoopFromHeader(BasicBlock* block) { for (SimpleLoop* loop : *m_loops) { @@ -405,7 +410,7 @@ void ProfileSynthesis::AssignLikelihoodCond(BasicBlock* block) } //------------------------------------------------------------------------ -// AssignLikelihoodCond: update edge likelihood for a block that +// AssignLikelihoodSwitch: update edge likelihood for a block that // ends in a switch // // Arguments; @@ -416,6 +421,7 @@ void ProfileSynthesis::AssignLikelihoodSwitch(BasicBlock* block) // Assume each switch case is equally probable // const unsigned n = block->NumSucc(); + assert(n != 0); const weight_t p = 1 / (weight_t)n; // Each unique edge gets some multiple of that basic probability @@ -607,7 +613,7 @@ void ProfileSynthesis::FindLoops() { BasicBlock* const loopBlock = m_bbNumToBlockMap[bbNum]; - for (BasicBlock* const succBlock : loopBlock->Succs()) + for (BasicBlock* const succBlock : loopBlock->Succs(m_comp)) { if (!BlockSetOps::IsMember(m_comp, loop->m_blocks, succBlock->bbNum)) { @@ -746,7 +752,7 @@ void ProfileSynthesis::ComputeCyclicProbabilities(SimpleLoop* loop) } else { - SimpleLoop* nestedLoop = IsLoopHeader(block); + SimpleLoop* const nestedLoop = GetLoopFromHeader(block); if (nestedLoop != nullptr) { @@ -869,7 +875,7 @@ void ProfileSynthesis::ComputeBlockWeights() for (unsigned int i = 1; i <= m_comp->fgBBNumMax; i++) { BasicBlock* const block = m_comp->fgBBReversePostorder[i]; - SimpleLoop* loop = IsLoopHeader(block); + SimpleLoop* const loop = GetLoopFromHeader(block); if (loop != nullptr) { diff --git a/src/coreclr/jit/fgprofilesynthesis.h b/src/coreclr/jit/fgprofilesynthesis.h index 22b4b5cba69e4..393f332089c7f 100644 --- a/src/coreclr/jit/fgprofilesynthesis.h +++ b/src/coreclr/jit/fgprofilesynthesis.h @@ -75,7 +75,7 @@ class ProfileSynthesis bool IsLoopExitEdge(FlowEdge* edge); void FindLoops(); - SimpleLoop* IsLoopHeader(BasicBlock* block); + SimpleLoop* GetLoopFromHeader(BasicBlock* block); void AssignLikelihoods(); void AssignLikelihoodNext(BasicBlock* block); diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 84a62465e7732..a1305299854ee 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -593,7 +593,7 @@ CONFIG_INTEGER(JitNoteFailedExactDevirtualization, W("JitNoteFailedExactDevirtua CONFIG_INTEGER(JitRandomlyCollect64BitCounts, W("JitRandomlyCollect64BitCounts"), 0) // Collect 64-bit counts randomly // for some methods. // 1: profile synthesis for root methods -// 2: profile synthesis for foot methods w/o pgo data +// 2: profile synthesis for root methods w/o pgo data CONFIG_INTEGER(JitSynthesizeCounts, W("JitSynthesizeCounts"), 0) #endif