Skip to content

Commit

Permalink
runtime/pprof: add streaming protobuf encoder
Browse files Browse the repository at this point in the history
The existing code builds a full profile in memory.
Then it translates that profile into a data structure (in memory).
Then it marshals that data structure into a protocol buffer (in memory).
Then it gzips that marshaled form into the underlying writer.
So there are three copies of the full profile data in memory
at the same time before we're done. This is obviously dumb.

This CL implements a fully streaming conversion from
the original in-memory profile to the underlying writer.
There is now only one copy of the profile in memory.

For the non-CPU profiles, this is optimal, since we have to
have a full copy in memory to start with.

For the CPU profiles, we could still try to bound the profile
size stored in memory and stream fragments out during
the actual profiling, as Go 1.7 did (with a simpler format),
but so far that hasn't been necessary.

Change-Id: Ic36141021857791bf0cd1fce84178fb5e744b989
Reviewed-on: https://go-review.googlesource.com/37164
Run-TryBot: Russ Cox <rsc@golang.org>
Reviewed-by: Michael Matloob <matloob@golang.org>
  • Loading branch information
rsc committed Feb 24, 2017
1 parent 322fff8 commit cbab65f
Show file tree
Hide file tree
Showing 9 changed files with 628 additions and 251 deletions.
13 changes: 6 additions & 7 deletions src/go/build/deps_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,13 +172,12 @@ var pkgDeps = map[string][]string{
"log": {"L1", "os", "fmt", "time"},

// Packages used by testing must be low-level (L2+fmt).
"regexp": {"L2", "regexp/syntax"},
"regexp/syntax": {"L2"},
"runtime/debug": {"L2", "fmt", "io/ioutil", "os", "time"},
"runtime/pprof/internal/protopprof": {"L2", "fmt", "internal/pprof/profile", "os", "time"},
"runtime/pprof": {"L2", "context", "fmt", "internal/pprof/profile", "os", "runtime/pprof/internal/protopprof", "text/tabwriter", "time"},
"runtime/trace": {"L0"},
"text/tabwriter": {"L2"},
"regexp": {"L2", "regexp/syntax"},
"regexp/syntax": {"L2"},
"runtime/debug": {"L2", "fmt", "io/ioutil", "os", "time"},
"runtime/pprof": {"L2", "compress/gzip", "context", "fmt", "io/ioutil", "os", "text/tabwriter", "time"},
"runtime/trace": {"L0"},
"text/tabwriter": {"L2"},

"testing": {"L2", "flag", "fmt", "internal/race", "os", "runtime/debug", "runtime/pprof", "runtime/trace", "time"},
"testing/iotest": {"L2", "log"},
Expand Down
17 changes: 8 additions & 9 deletions src/runtime/pprof/mprof_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package pprof_test
package pprof

import (
"bytes"
"fmt"
"reflect"
"regexp"
"runtime"
. "runtime/pprof"
"testing"
"unsafe"
)
Expand Down Expand Up @@ -86,22 +85,22 @@ func TestMemoryProfiler(t *testing.T) {

tests := []string{
fmt.Sprintf(`%v: %v \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
# 0x[0-9,a-f]+ runtime/pprof_test\.allocatePersistent1K\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test\.go:41
# 0x[0-9,a-f]+ runtime/pprof_test\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test\.go:75
# 0x[0-9,a-f]+ runtime/pprof\.allocatePersistent1K\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test\.go:40
# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test\.go:74
`, 32*memoryProfilerRun, 1024*memoryProfilerRun, 32*memoryProfilerRun, 1024*memoryProfilerRun),

fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
# 0x[0-9,a-f]+ runtime/pprof_test\.allocateTransient1M\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:22
# 0x[0-9,a-f]+ runtime/pprof_test\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:73
# 0x[0-9,a-f]+ runtime/pprof\.allocateTransient1M\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:21
# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:72
`, (1<<10)*memoryProfilerRun, (1<<20)*memoryProfilerRun),

fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
# 0x[0-9,a-f]+ runtime/pprof_test\.allocateTransient2M\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:28
# 0x[0-9,a-f]+ runtime/pprof_test\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:74
# 0x[0-9,a-f]+ runtime/pprof\.allocateTransient2M\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:27
# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:73
`, memoryProfilerRun, (2<<20)*memoryProfilerRun),

fmt.Sprintf(`0: 0 \[%v: %v\] @( 0x[0-9,a-f]+)+
# 0x[0-9,a-f]+ runtime/pprof_test\.allocateReflectTransient\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:49
# 0x[0-9,a-f]+ runtime/pprof\.allocateReflectTransient\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:48
`, memoryProfilerRun, (2<<20)*memoryProfilerRun),
}

Expand Down
50 changes: 19 additions & 31 deletions src/runtime/pprof/pprof.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ import (
"bufio"
"bytes"
"fmt"
"internal/pprof/profile"
"io"
"runtime"
"sort"
Expand Down Expand Up @@ -384,35 +383,26 @@ func printCountProfile(w io.Writer, debug int, name string, p countProfile) erro
}

// Output profile in protobuf form.
prof := &profile.Profile{
PeriodType: &profile.ValueType{Type: name, Unit: "count"},
Period: 1,
Sample: make([]*profile.Sample, 0, len(keys)),
SampleType: []*profile.ValueType{{Type: name, Unit: "count"}},
}
locMap := make(map[uintptr]*profile.Location)
b := newProfileBuilder(w)
b.pbValueType(tagProfile_PeriodType, name, "count")
b.pb.int64Opt(tagProfile_Period, 1)
b.pbValueType(tagProfile_SampleType, name, "count")

values := []int64{0}
var locs []uint64
for _, k := range keys {
stk := p.Stack(index[k])
c := count[k]
locs := make([]*profile.Location, len(stk))
for i, addr := range stk {
loc := locMap[addr]
if loc == nil {
loc = &profile.Location{
ID: uint64(len(locMap) + 1),
Address: uint64(addr - 1),
}
prof.Location = append(prof.Location, loc)
locMap[addr] = loc
values[0] = int64(count[k])
locs = locs[:0]
for i, addr := range p.Stack(index[k]) {
if false && i > 0 { // TODO: why disabled?
addr--
}
locs[i] = loc
locs = append(locs, b.locForPC(addr))
}
prof.Sample = append(prof.Sample, &profile.Sample{
Location: locs,
Value: []int64{int64(c)},
})
b.pbSample(values, locs, nil)
}
return prof.Write(w)
b.build()
return nil
}

// keysByCount sorts keys with higher counts first, breaking ties by key string order.
Expand Down Expand Up @@ -500,8 +490,7 @@ func writeHeap(w io.Writer, debug int) error {
}

if debug == 0 {
pp := encodeMemProfile(p, int64(runtime.MemProfileRate), time.Now())
return pp.Write(w)
return writeHeapProto(w, p, int64(runtime.MemProfileRate))
}

sort.Slice(p, func(i, j int) bool { return p[i].InUseBytes() > p[j].InUseBytes() })
Expand Down Expand Up @@ -705,7 +694,7 @@ func StartCPUProfile(w io.Writer) error {
func readProfile() (data []uint64, tags []unsafe.Pointer, eof bool)

func profileWriter(w io.Writer) {
b := newProfileBuilder()
b := newProfileBuilder(w)
var err error
for {
time.Sleep(100 * time.Millisecond)
Expand All @@ -717,13 +706,12 @@ func profileWriter(w io.Writer) {
break
}
}
p := b.build()
if err != nil {
// The runtime should never produce an invalid or truncated profile.
// It drops records that can't fit into its log buffers.
panic("runtime/pprof: converting profile: " + err.Error())
}
p.Write(w)
b.build()
cpu.done <- true
}

Expand Down
44 changes: 24 additions & 20 deletions src/runtime/pprof/pprof_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

// +build !nacl

package pprof_test
package pprof

import (
"bytes"
Expand All @@ -16,7 +16,6 @@ import (
"os/exec"
"regexp"
"runtime"
. "runtime/pprof"
"strings"
"sync"
"testing"
Expand Down Expand Up @@ -68,14 +67,14 @@ func cpuHog2() {
}

func TestCPUProfile(t *testing.T) {
testCPUProfile(t, []string{"runtime/pprof_test.cpuHog1"}, func(dur time.Duration) {
testCPUProfile(t, []string{"runtime/pprof.cpuHog1"}, func(dur time.Duration) {
cpuHogger(cpuHog1, dur)
})
}

func TestCPUProfileMultithreaded(t *testing.T) {
defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(2))
testCPUProfile(t, []string{"runtime/pprof_test.cpuHog1", "runtime/pprof_test.cpuHog2"}, func(dur time.Duration) {
testCPUProfile(t, []string{"runtime/pprof.cpuHog1", "runtime/pprof.cpuHog2"}, func(dur time.Duration) {
c := make(chan int)
go func() {
cpuHogger(cpuHog1, dur)
Expand Down Expand Up @@ -171,21 +170,26 @@ func profileOk(t *testing.T, need []string, prof bytes.Buffer, duration time.Dur
// Check that profile is well formed and contains need.
have := make([]uintptr, len(need))
var samples uintptr
var buf bytes.Buffer
parseProfile(t, prof.Bytes(), func(count uintptr, stk []uintptr) {
fmt.Fprintf(&buf, "%d:", count)
samples += count
for _, pc := range stk {
fmt.Fprintf(&buf, " %#x", pc)
f := runtime.FuncForPC(pc)
if f == nil {
continue
}
fmt.Fprintf(&buf, "(%s)", f.Name())
for i, name := range need {
if strings.Contains(f.Name(), name) {
have[i] += count
}
}
}
fmt.Fprintf(&buf, "\n")
})
t.Logf("total %d CPU profile samples collected", samples)
t.Logf("total %d CPU profile samples collected:\n%s", samples, buf.String())

if samples < 10 && runtime.GOOS == "windows" {
// On some windows machines we end up with
Expand Down Expand Up @@ -361,44 +365,44 @@ func TestBlockProfile(t *testing.T) {
{"chan recv", blockChanRecv, `
[0-9]+ [0-9]+ @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
# 0x[0-9,a-f]+ runtime\.chanrecv1\+0x[0-9,a-f]+ .*/src/runtime/chan.go:[0-9]+
# 0x[0-9,a-f]+ runtime/pprof_test\.blockChanRecv\+0x[0-9,a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9,a-f]+ runtime/pprof_test\.TestBlockProfile\+0x[0-9,a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9,a-f]+ runtime/pprof\.blockChanRecv\+0x[0-9,a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9,a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9,a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
`},
{"chan send", blockChanSend, `
[0-9]+ [0-9]+ @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
# 0x[0-9,a-f]+ runtime\.chansend1\+0x[0-9,a-f]+ .*/src/runtime/chan.go:[0-9]+
# 0x[0-9,a-f]+ runtime/pprof_test\.blockChanSend\+0x[0-9,a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9,a-f]+ runtime/pprof_test\.TestBlockProfile\+0x[0-9,a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9,a-f]+ runtime/pprof\.blockChanSend\+0x[0-9,a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9,a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9,a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
`},
{"chan close", blockChanClose, `
[0-9]+ [0-9]+ @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
# 0x[0-9,a-f]+ runtime\.chanrecv1\+0x[0-9,a-f]+ .*/src/runtime/chan.go:[0-9]+
# 0x[0-9,a-f]+ runtime/pprof_test\.blockChanClose\+0x[0-9,a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9,a-f]+ runtime/pprof_test\.TestBlockProfile\+0x[0-9,a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9,a-f]+ runtime/pprof\.blockChanClose\+0x[0-9,a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9,a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9,a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
`},
{"select recv async", blockSelectRecvAsync, `
[0-9]+ [0-9]+ @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
# 0x[0-9,a-f]+ runtime\.selectgo\+0x[0-9,a-f]+ .*/src/runtime/select.go:[0-9]+
# 0x[0-9,a-f]+ runtime/pprof_test\.blockSelectRecvAsync\+0x[0-9,a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9,a-f]+ runtime/pprof_test\.TestBlockProfile\+0x[0-9,a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9,a-f]+ runtime/pprof\.blockSelectRecvAsync\+0x[0-9,a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9,a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9,a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
`},
{"select send sync", blockSelectSendSync, `
[0-9]+ [0-9]+ @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
# 0x[0-9,a-f]+ runtime\.selectgo\+0x[0-9,a-f]+ .*/src/runtime/select.go:[0-9]+
# 0x[0-9,a-f]+ runtime/pprof_test\.blockSelectSendSync\+0x[0-9,a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9,a-f]+ runtime/pprof_test\.TestBlockProfile\+0x[0-9,a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9,a-f]+ runtime/pprof\.blockSelectSendSync\+0x[0-9,a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9,a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9,a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
`},
{"mutex", blockMutex, `
[0-9]+ [0-9]+ @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
# 0x[0-9,a-f]+ sync\.\(\*Mutex\)\.Lock\+0x[0-9,a-f]+ .*/src/sync/mutex\.go:[0-9]+
# 0x[0-9,a-f]+ runtime/pprof_test\.blockMutex\+0x[0-9,a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9,a-f]+ runtime/pprof_test\.TestBlockProfile\+0x[0-9,a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9,a-f]+ runtime/pprof\.blockMutex\+0x[0-9,a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9,a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9,a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
`},
{"cond", blockCond, `
[0-9]+ [0-9]+ @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
# 0x[0-9,a-f]+ sync\.\(\*Cond\)\.Wait\+0x[0-9,a-f]+ .*/src/sync/cond\.go:[0-9]+
# 0x[0-9,a-f]+ runtime/pprof_test\.blockCond\+0x[0-9,a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9,a-f]+ runtime/pprof_test\.TestBlockProfile\+0x[0-9,a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9,a-f]+ runtime/pprof\.blockCond\+0x[0-9,a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
# 0x[0-9,a-f]+ runtime/pprof\.TestBlockProfile\+0x[0-9,a-f]+ .*/src/runtime/pprof/pprof_test.go:[0-9]+
`},
}

Expand Down Expand Up @@ -541,7 +545,7 @@ func TestMutexProfile(t *testing.T) {
if ok, err := regexp.MatchString(r2, lines[3]); err != nil || !ok {
t.Errorf("%q didn't match %q", lines[3], r2)
}
r3 := "^#.*runtime/pprof_test.blockMutex.*$"
r3 := "^#.*runtime/pprof.blockMutex.*$"
if ok, err := regexp.MatchString(r3, lines[5]); err != nil || !ok {
t.Errorf("%q didn't match %q", lines[5], r3)
}
Expand Down
Loading

0 comments on commit cbab65f

Please sign in to comment.