Skip to content

Commit

Permalink
cmd/compile,cmd/preprofile: move logic to shared common package
Browse files Browse the repository at this point in the history
The processing performed in cmd/preprofile is a simple version of the
same initial processing performed by cmd/compile/internal/pgo. Refactor
this processing into the new IR-independent cmd/internal/pgo package.

Now cmd/preprofile and cmd/compile run the same code for initial
processing of a pprof profile, guaranteeing that they always stay in
sync.

Since it is now trivial, this CL makes one change to the serialization
format: the entries are ordered by weight. This allows us to avoid
sorting ByWeight on deserialization.

Impact on PGO parsing when compiling cmd/compile with PGO:

* Without preprocessing: PGO parsing ~13.7% of CPU time
* With preprocessing (unsorted): ~2.9% of CPU time (sorting ~1.7%)
* With preprocessing (sorted): ~1.3% of CPU time

The remaining 1.3% of CPU time approximately breaks down as:

* ~0.5% parsing the preprocessed profile
* ~0.7% building weighted IR call graph
  * ~0.5% walking function IR to find direct calls
  * ~0.2% performing lookups for indirect calls targets

For #58102.

Change-Id: Iaba425ea30b063ca195fb2f7b29342961c8a64c2
Reviewed-on: https://go-review.googlesource.com/c/go/+/569337
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Michael Pratt <mpratt@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
  • Loading branch information
prattmic authored and gopherbot committed Mar 27, 2024
1 parent 2860e01 commit 63deaf0
Show file tree
Hide file tree
Showing 16 changed files with 685 additions and 443 deletions.
25 changes: 13 additions & 12 deletions src/cmd/compile/internal/devirtualize/pgo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@ package devirtualize
import (
"cmd/compile/internal/base"
"cmd/compile/internal/ir"
"cmd/compile/internal/pgo"
pgoir "cmd/compile/internal/pgo"
"cmd/compile/internal/typecheck"
"cmd/compile/internal/types"
"cmd/internal/obj"
"cmd/internal/pgo"
"cmd/internal/src"
"testing"
)
Expand All @@ -32,32 +33,32 @@ func makePos(b *src.PosBase, line, col uint) src.XPos {
}

type profileBuilder struct {
p *pgo.Profile
p *pgoir.Profile
}

func newProfileBuilder() *profileBuilder {
// findHotConcreteCallee only uses pgo.Profile.WeightedCG, so we're
// findHotConcreteCallee only uses pgoir.Profile.WeightedCG, so we're
// going to take a shortcut and only construct that.
return &profileBuilder{
p: &pgo.Profile{
WeightedCG: &pgo.IRGraph{
IRNodes: make(map[string]*pgo.IRNode),
p: &pgoir.Profile{
WeightedCG: &pgoir.IRGraph{
IRNodes: make(map[string]*pgoir.IRNode),
},
},
}
}

// Profile returns the constructed profile.
func (p *profileBuilder) Profile() *pgo.Profile {
func (p *profileBuilder) Profile() *pgoir.Profile {
return p.p
}

// NewNode creates a new IRNode and adds it to the profile.
//
// fn may be nil, in which case the node will set LinkerSymbolName.
func (p *profileBuilder) NewNode(name string, fn *ir.Func) *pgo.IRNode {
n := &pgo.IRNode{
OutEdges: make(map[pgo.NamedCallEdge]*pgo.IREdge),
func (p *profileBuilder) NewNode(name string, fn *ir.Func) *pgoir.IRNode {
n := &pgoir.IRNode{
OutEdges: make(map[pgo.NamedCallEdge]*pgoir.IREdge),
}
if fn != nil {
n.AST = fn
Expand All @@ -69,13 +70,13 @@ func (p *profileBuilder) NewNode(name string, fn *ir.Func) *pgo.IRNode {
}

// Add a new call edge from caller to callee.
func addEdge(caller, callee *pgo.IRNode, offset int, weight int64) {
func addEdge(caller, callee *pgoir.IRNode, offset int, weight int64) {
namedEdge := pgo.NamedCallEdge{
CallerName: caller.Name(),
CalleeName: callee.Name(),
CallSiteOffset: offset,
}
irEdge := &pgo.IREdge{
irEdge := &pgoir.IREdge{
Src: caller,
Dst: callee,
CallSiteOffset: offset,
Expand Down
35 changes: 18 additions & 17 deletions src/cmd/compile/internal/inline/inl.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,11 @@ import (
"cmd/compile/internal/inline/inlheur"
"cmd/compile/internal/ir"
"cmd/compile/internal/logopt"
"cmd/compile/internal/pgo"
pgoir "cmd/compile/internal/pgo"
"cmd/compile/internal/typecheck"
"cmd/compile/internal/types"
"cmd/internal/obj"
"cmd/internal/pgo"
)

// Inlining budget parameters, gathered in one place
Expand All @@ -58,11 +59,11 @@ const (
var (
// List of all hot callee nodes.
// TODO(prattmic): Make this non-global.
candHotCalleeMap = make(map[*pgo.IRNode]struct{})
candHotCalleeMap = make(map[*pgoir.IRNode]struct{})

// List of all hot call sites. CallSiteInfo.Callee is always nil.
// TODO(prattmic): Make this non-global.
candHotEdgeMap = make(map[pgo.CallSiteInfo]struct{})
candHotEdgeMap = make(map[pgoir.CallSiteInfo]struct{})

// Threshold in percentage for hot callsite inlining.
inlineHotCallSiteThresholdPercent float64
Expand All @@ -78,7 +79,7 @@ var (
)

// PGOInlinePrologue records the hot callsites from ir-graph.
func PGOInlinePrologue(p *pgo.Profile) {
func PGOInlinePrologue(p *pgoir.Profile) {
if base.Debug.PGOInlineCDFThreshold != "" {
if s, err := strconv.ParseFloat(base.Debug.PGOInlineCDFThreshold, 64); err == nil && s >= 0 && s <= 100 {
inlineCDFHotCallSiteThresholdPercent = s
Expand All @@ -103,7 +104,7 @@ func PGOInlinePrologue(p *pgo.Profile) {
}
// mark hot call sites
if caller := p.WeightedCG.IRNodes[n.CallerName]; caller != nil && caller.AST != nil {
csi := pgo.CallSiteInfo{LineOffset: n.CallSiteOffset, Caller: caller.AST}
csi := pgoir.CallSiteInfo{LineOffset: n.CallSiteOffset, Caller: caller.AST}
candHotEdgeMap[csi] = struct{}{}
}
}
Expand All @@ -120,7 +121,7 @@ func PGOInlinePrologue(p *pgo.Profile) {
// (currently only used in debug prints) (in case of equal weights,
// comparing with the threshold may not accurately reflect which nodes are
// considered hot).
func hotNodesFromCDF(p *pgo.Profile) (float64, []pgo.NamedCallEdge) {
func hotNodesFromCDF(p *pgoir.Profile) (float64, []pgo.NamedCallEdge) {
cum := int64(0)
for i, n := range p.NamedEdgeMap.ByWeight {
w := p.NamedEdgeMap.Weight[n]
Expand All @@ -136,7 +137,7 @@ func hotNodesFromCDF(p *pgo.Profile) (float64, []pgo.NamedCallEdge) {
}

// CanInlineFuncs computes whether a batch of functions are inlinable.
func CanInlineFuncs(funcs []*ir.Func, profile *pgo.Profile) {
func CanInlineFuncs(funcs []*ir.Func, profile *pgoir.Profile) {
if profile != nil {
PGOInlinePrologue(profile)
}
Expand Down Expand Up @@ -224,7 +225,7 @@ func GarbageCollectUnreferencedHiddenClosures() {
// possibility that a call to the function might have its score
// adjusted downwards. If 'verbose' is set, then print a remark where
// we boost the budget due to PGO.
func inlineBudget(fn *ir.Func, profile *pgo.Profile, relaxed bool, verbose bool) int32 {
func inlineBudget(fn *ir.Func, profile *pgoir.Profile, relaxed bool, verbose bool) int32 {
// Update the budget for profile-guided inlining.
budget := int32(inlineMaxBudget)
if profile != nil {
Expand All @@ -246,7 +247,7 @@ func inlineBudget(fn *ir.Func, profile *pgo.Profile, relaxed bool, verbose bool)
// CanInline determines whether fn is inlineable.
// If so, CanInline saves copies of fn.Body and fn.Dcl in fn.Inl.
// fn and fn.Body will already have been typechecked.
func CanInline(fn *ir.Func, profile *pgo.Profile) {
func CanInline(fn *ir.Func, profile *pgoir.Profile) {
if fn.Nname == nil {
base.Fatalf("CanInline no nname %+v", fn)
}
Expand Down Expand Up @@ -451,7 +452,7 @@ type hairyVisitor struct {
extraCallCost int32
usedLocals ir.NameSet
do func(ir.Node) bool
profile *pgo.Profile
profile *pgoir.Profile
}

func (v *hairyVisitor) tooHairy(fn *ir.Func) bool {
Expand Down Expand Up @@ -768,7 +769,7 @@ func IsBigFunc(fn *ir.Func) bool {

// TryInlineCall returns an inlined call expression for call, or nil
// if inlining is not possible.
func TryInlineCall(callerfn *ir.Func, call *ir.CallExpr, bigCaller bool, profile *pgo.Profile) *ir.InlinedCallExpr {
func TryInlineCall(callerfn *ir.Func, call *ir.CallExpr, bigCaller bool, profile *pgoir.Profile) *ir.InlinedCallExpr {
if base.Flag.LowerL == 0 {
return nil
}
Expand Down Expand Up @@ -804,7 +805,7 @@ func TryInlineCall(callerfn *ir.Func, call *ir.CallExpr, bigCaller bool, profile

// inlCallee takes a function-typed expression and returns the underlying function ONAME
// that it refers to if statically known. Otherwise, it returns nil.
func inlCallee(caller *ir.Func, fn ir.Node, profile *pgo.Profile) (res *ir.Func) {
func inlCallee(caller *ir.Func, fn ir.Node, profile *pgoir.Profile) (res *ir.Func) {
fn = ir.StaticValue(fn)
switch fn.Op() {
case ir.OMETHEXPR:
Expand Down Expand Up @@ -877,8 +878,8 @@ func inlineCostOK(n *ir.CallExpr, caller, callee *ir.Func, bigCaller bool) (bool
// We'll also allow inlining of hot functions below inlineHotMaxBudget,
// but only in small functions.

lineOffset := pgo.NodeLineOffset(n, caller)
csi := pgo.CallSiteInfo{LineOffset: lineOffset, Caller: caller}
lineOffset := pgoir.NodeLineOffset(n, caller)
csi := pgoir.CallSiteInfo{LineOffset: lineOffset, Caller: caller}
if _, ok := candHotEdgeMap[csi]; !ok {
// Cold
return false, maxCost, metric
Expand Down Expand Up @@ -1188,17 +1189,17 @@ func isAtomicCoverageCounterUpdate(cn *ir.CallExpr) bool {
return v
}

func PostProcessCallSites(profile *pgo.Profile) {
func PostProcessCallSites(profile *pgoir.Profile) {
if base.Debug.DumpInlCallSiteScores != 0 {
budgetCallback := func(fn *ir.Func, prof *pgo.Profile) (int32, bool) {
budgetCallback := func(fn *ir.Func, prof *pgoir.Profile) (int32, bool) {
v := inlineBudget(fn, prof, false, false)
return v, v == inlineHotMaxBudget
}
inlheur.DumpInlCallSiteScores(profile, budgetCallback)
}
}

func analyzeFuncProps(fn *ir.Func, p *pgo.Profile) {
func analyzeFuncProps(fn *ir.Func, p *pgoir.Profile) {
canInline := func(fn *ir.Func) { CanInline(fn, p) }
budgetForFunc := func(fn *ir.Func) int32 {
return inlineBudget(fn, p, true, false)
Expand Down
Loading

0 comments on commit 63deaf0

Please sign in to comment.