diff --git a/Makefile b/Makefile
index e7ef845c988d..96cede686950 100644
--- a/Makefile
+++ b/Makefile
@@ -1211,7 +1211,7 @@ ARCHIVE_EXTRAS = \
$(BUILDINFO) \
$(SQLPARSER_TARGETS) \
$(OPTGEN_TARGETS) \
- pkg/ui/assets.ccl.installed pkg/ui/assets.oss.installed
+ pkg/ui/distccl/assets pkg/ui/distoss/assets pkg/ui/dist_vendor
# TODO(benesch): Make this recipe use `git ls-files --recurse-submodules`
# instead of scripts/ls-files.sh once Git v2.11 is widely deployed.
@@ -1442,7 +1442,7 @@ ui-watch ui-watch-secure: $(UI_CCL_DLLS) pkg/ui/yarn.opt.installed
ui-clean: ## Remove build artifacts.
find pkg/ui/distccl/assets pkg/ui/distoss/assets -mindepth 1 -not -name .gitkeep -delete
rm -rf pkg/ui/assets.ccl.installed pkg/ui/assets.oss.installed
- rm -rf pkg/ui/dist/*
+ rm -rf pkg/ui/dist_vendor/*
rm -f $(UI_PROTOS_CCL) $(UI_PROTOS_OSS)
rm -f pkg/ui/workspaces/db-console/*manifest.json
rm -rf pkg/ui/workspaces/cluster-ui/dist
diff --git a/pkg/server/debug/BUILD.bazel b/pkg/server/debug/BUILD.bazel
index 6f731cd5e7a5..38120156700d 100644
--- a/pkg/server/debug/BUILD.bazel
+++ b/pkg/server/debug/BUILD.bazel
@@ -29,6 +29,8 @@ go_library(
"//pkg/util/log/severity",
"//pkg/util/stop",
"//pkg/util/timeutil",
+ "//pkg/util/tracing",
+ "//pkg/util/tracing/tracingui",
"//pkg/util/uint128",
"@com_github_cockroachdb_errors//:errors",
"@com_github_cockroachdb_pebble//tool",
diff --git a/pkg/server/debug/server.go b/pkg/server/debug/server.go
index 788b99b47b3c..15cdef61d07d 100644
--- a/pkg/server/debug/server.go
+++ b/pkg/server/debug/server.go
@@ -32,6 +32,8 @@ import (
"github.com/cockroachdb/cockroach/pkg/storage"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/stop"
+ "github.com/cockroachdb/cockroach/pkg/util/tracing"
+ "github.com/cockroachdb/cockroach/pkg/util/tracing/tracingui"
"github.com/cockroachdb/errors"
pebbletool "github.com/cockroachdb/pebble/tool"
metrics "github.com/rcrowley/go-metrics"
@@ -61,14 +63,18 @@ var _ = func() *settings.StringSetting {
// Server serves the /debug/* family of tools.
type Server struct {
- st *cluster.Settings
- mux *http.ServeMux
- spy logSpy
+ ambientCtx log.AmbientContext
+ st *cluster.Settings
+ mux *http.ServeMux
+ spy logSpy
}
// NewServer sets up a debug server.
func NewServer(
- st *cluster.Settings, hbaConfDebugFn http.HandlerFunc, profiler pprofui.Profiler,
+ ambientContext log.AmbientContext,
+ st *cluster.Settings,
+ hbaConfDebugFn http.HandlerFunc,
+ profiler pprofui.Profiler,
) *Server {
mux := http.NewServeMux()
@@ -137,9 +143,10 @@ func NewServer(
})
return &Server{
- st: st,
- mux: mux,
- spy: spy,
+ ambientCtx: ambientContext,
+ st: st,
+ mux: mux,
+ spy: spy,
}
}
@@ -232,6 +239,12 @@ func (ds *Server) RegisterClosedTimestampSideTransport(
})
}
+// RegisterTracez registers the /debug/tracez handler, which renders snapshots
+// of active spans.
+func (ds *Server) RegisterTracez(tr *tracing.Tracer) {
+ tracingui.RegisterHTTPHandlers(ds.ambientCtx, ds.mux, tr)
+}
+
// ServeHTTP serves various tools under the /debug endpoint.
func (ds *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) {
handler, _ := ds.mux.Handler(r)
diff --git a/pkg/server/server.go b/pkg/server/server.go
index b66783293f9c..4203ed2fa1d7 100644
--- a/pkg/server/server.go
+++ b/pkg/server/server.go
@@ -834,7 +834,7 @@ func NewServer(cfg Config, stopper *stop.Stopper) (*Server, error) {
}
sStatus.setStmtDiagnosticsRequester(sqlServer.execCfg.StmtDiagnosticsRecorder)
sStatus.baseStatusServer.sqlServer = sqlServer
- debugServer := debug.NewServer(st, sqlServer.pgServer.HBADebugFn(), sStatus)
+ debugServer := debug.NewServer(cfg.BaseConfig.AmbientCtx, st, sqlServer.pgServer.HBADebugFn(), sStatus)
node.InitLogger(sqlServer.execCfg)
*lateBoundServer = Server{
@@ -1958,6 +1958,7 @@ func (s *Server) PreStart(ctx context.Context) error {
return errors.Wrapf(err, "failed to register engines with debug server")
}
s.debug.RegisterClosedTimestampSideTransport(s.ctSender, s.node.storeCfg.ClosedTimestampReceiver)
+ s.debug.RegisterTracez(s.cfg.Tracer)
s.ctSender.Run(ctx, state.nodeID)
diff --git a/pkg/server/tenant.go b/pkg/server/tenant.go
index c74b18a3913b..bd7c9a2809bc 100644
--- a/pkg/server/tenant.go
+++ b/pkg/server/tenant.go
@@ -211,7 +211,7 @@ func StartTenant(
)
mux := http.NewServeMux()
- debugServer := debug.NewServer(args.Settings, s.pgServer.HBADebugFn(), s.execCfg.SQLStatusServer)
+ debugServer := debug.NewServer(baseCfg.AmbientCtx, args.Settings, s.pgServer.HBADebugFn(), s.execCfg.SQLStatusServer)
mux.Handle("/", debugServer)
mux.Handle("/_status/", gwMux)
mux.HandleFunc("/health", func(w http.ResponseWriter, req *http.Request) {
diff --git a/pkg/ui/.gitignore b/pkg/ui/.gitignore
index 0d5500eded18..a583d996bffd 100644
--- a/pkg/ui/.gitignore
+++ b/pkg/ui/.gitignore
@@ -14,6 +14,7 @@ assets.oss.installed
# Generated intermediates
.cache-loader
dist*/**
+dist_vendor
!dist*/_empty_assets/assets/index.html
!distccl/assets/.gitkeep
!distccl/distccl.go
diff --git a/pkg/ui/BUILD.bazel b/pkg/ui/BUILD.bazel
index 862ffdf0aed6..144d5d3725a3 100644
--- a/pkg/ui/BUILD.bazel
+++ b/pkg/ui/BUILD.bazel
@@ -1,8 +1,16 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
+load("@build_bazel_rules_nodejs//:index.bzl", "nodejs_binary")
go_library(
name = "ui",
- srcs = ["ui.go"],
+ srcs = [
+ "tracing_ui.go",
+ "ui.go",
+ ],
+ embedsrcs = [
+ "dist_vendor/list.min.js",
+ "templates/tracing/html_template.html",
+ ],
importpath = "github.com/cockroachdb/cockroach/pkg/ui",
visibility = ["//visibility:public"],
deps = [
@@ -35,3 +43,11 @@ EOF
""",
visibility = ["//pkg/ui:__subpackages__"],
)
+
+genrule(
+ name = "listjs",
+ srcs = ["@npm//:node_modules/list.js/dist/list.min.js"],
+ outs = ["dist_vendor/list.min.js"],
+ cmd = "cp ./$(location @npm//:node_modules/list.js/dist/list.min.js) $@",
+ tools = ["@npm//list.js"],
+)
diff --git a/pkg/ui/templates/tracing/html_template.html b/pkg/ui/templates/tracing/html_template.html
new file mode 100644
index 000000000000..f66b7a4d6ad0
--- /dev/null
+++ b/pkg/ui/templates/tracing/html_template.html
@@ -0,0 +1,140 @@
+
+
+
+
+
+
+
+
+
+Take a snapshot of current operations
+
+Stored snapshots (ID: capture time):
+{{$id := .SnapshotID}}
+{{range $i, $s := .AllSnapshots}}
+
+ {{if eq $s.ID $id}}
+ [current] {{$s.ID}}: {{formatTimeNoMillis .CapturedAt}}
+ {{else}}
+ {{$s.ID}}: {{formatTimeNoMillis .CapturedAt}}
+ {{end}}
+
+{{end}}
+
+
+Spans currently open: {{len .SpansList.Spans}}. Snapshot captured at: {{formatTime .CapturedAt}} UTC. Page generated at: {{formatTime .Now}} UTC.
+{{if ne .Err nil}}
+There was an error producing this snapshot; it might be incomplete: {{.Err}}
+{{end}}
+
+
+
+
+
diff --git a/pkg/ui/tracing_ui.go b/pkg/ui/tracing_ui.go
new file mode 100644
index 000000000000..000ee4f99ce7
--- /dev/null
+++ b/pkg/ui/tracing_ui.go
@@ -0,0 +1,36 @@
+// Copyright 2021 The Cockroach Authors.
+//
+// Use of this software is governed by the Business Source License
+// included in the file licenses/BSL.txt.
+//
+// As of the Change Date specified in that file, in accordance with
+// the Business Source License, use of this software will be governed
+// by the Apache License, Version 2.0, included in the file
+// licenses/APL.txt.
+
+package ui
+
+import (
+ "embed"
+ "io/fs"
+)
+
+// This file deals with embedding assets used by /debug/tracez.
+
+//go:embed dist_vendor/list.min.js
+var listJS embed.FS
+
+//go:embed templates/tracing/html_template.html
+// SpansTableTemplateSrc contains a template used by /debug/tracez
+var SpansTableTemplateSrc string
+
+// ListJS exposes list.js package
+var ListJS fs.FS
+
+func init() {
+ f, err := fs.Sub(listJS, "dist_vendor")
+ if err != nil {
+ panic(err)
+ }
+ ListJS = f
+}
diff --git a/pkg/ui/workspaces/db-console/BUILD.bazel b/pkg/ui/workspaces/db-console/BUILD.bazel
index 75b99f44106d..494064a475c6 100644
--- a/pkg/ui/workspaces/db-console/BUILD.bazel
+++ b/pkg/ui/workspaces/db-console/BUILD.bazel
@@ -130,6 +130,7 @@ DEPENDENCIES = [
"@npm//highlight.js",
"@npm//lodash",
"@npm//long",
+ "@npm//list.js",
"@npm//mini-create-react-context",
"@npm//moment",
"@npm//nvd3",
diff --git a/pkg/ui/workspaces/db-console/package.json b/pkg/ui/workspaces/db-console/package.json
index 1d1f0d06e2af..600d687f10b7 100644
--- a/pkg/ui/workspaces/db-console/package.json
+++ b/pkg/ui/workspaces/db-console/package.json
@@ -29,6 +29,7 @@
"connected-react-router": "^6.9.1",
"create-react-context": "^0.3.0",
"highlight.js": "^10.6.0",
+ "list.js": "^2.3.1",
"lodash": "^4.17.21",
"long": "^4.0.0",
"mini-create-react-context": "^0.3.2",
diff --git a/pkg/ui/workspaces/db-console/src/views/reports/containers/debug/index.tsx b/pkg/ui/workspaces/db-console/src/views/reports/containers/debug/index.tsx
index 4b827609d132..6bcd38d4dc89 100644
--- a/pkg/ui/workspaces/db-console/src/views/reports/containers/debug/index.tsx
+++ b/pkg/ui/workspaces/db-console/src/views/reports/containers/debug/index.tsx
@@ -330,6 +330,7 @@ export default function Debug() {
+
{
new RemoveBrokenDependenciesPlugin(),
new CopyWebpackPlugin([
{ from: path.resolve(__dirname, "favicon.ico"), to: "favicon.ico" },
+ {
+ from: path.resolve(
+ !isBazelBuild ? __dirname : "",
+ !isBazelBuild ? "../.." : "",
+ "node_modules/list.js/dist/list.min.js",
+ ),
+ to: path.resolve(__dirname, "../../dist_vendor/list.min.js"),
+ },
]),
// use WebpackBar instead of webpack dashboard to fit multiple webpack dev server outputs (db-console and cluster-ui)
new WebpackBar({
diff --git a/pkg/ui/yarn-vendor b/pkg/ui/yarn-vendor
index b7cef07d6748..dfceabccc717 160000
--- a/pkg/ui/yarn-vendor
+++ b/pkg/ui/yarn-vendor
@@ -1 +1 @@
-Subproject commit b7cef07d6748b5af824fb1c1af90cc17f3009c56
+Subproject commit dfceabccc717aff312f17423089652e9e697355b
diff --git a/pkg/ui/yarn.lock b/pkg/ui/yarn.lock
index 0295930e2b57..c0198cdf4cb7 100644
--- a/pkg/ui/yarn.lock
+++ b/pkg/ui/yarn.lock
@@ -11461,6 +11461,13 @@ linkify-it@^2.0.0:
dependencies:
uc.micro "^1.0.1"
+list.js@^2.3.1:
+ version "2.3.1"
+ resolved "https://registry.yarnpkg.com/list.js/-/list.js-2.3.1.tgz#48961989ffe52b0505e352f7a521f819f51df7e7"
+ integrity sha512-jnmm7DYpKtH3DxtO1E2VNCC9Gp7Wrp/FWA2JxQrZUhVJ2RCQBd57pCN6W5w6jpsfWZV0PCAbTX2NOPgyFeeZZg==
+ dependencies:
+ string-natural-compare "^2.0.2"
+
listr-silent-renderer@^1.1.1:
version "1.1.1"
resolved "https://registry.yarnpkg.com/listr-silent-renderer/-/listr-silent-renderer-1.1.1.tgz#924b5a3757153770bf1a8e3fbf74b8bbf3f9242e"
@@ -16399,6 +16406,11 @@ string-length@^3.1.0:
astral-regex "^1.0.0"
strip-ansi "^5.2.0"
+string-natural-compare@^2.0.2:
+ version "2.0.3"
+ resolved "https://registry.yarnpkg.com/string-natural-compare/-/string-natural-compare-2.0.3.tgz#9dbe1dd65490a5fe14f7a5c9bc686fc67cb9c6e4"
+ integrity sha512-4Kcl12rNjc+6EKhY8QyDVuQTAlMWwRiNbsxnVwBUKFr7dYPQuXVrtNU4sEkjF9LHY0AY6uVbB3ktbkIH4LC+BQ==
+
string-replace-webpack-plugin@^0.1.3:
version "0.1.3"
resolved "https://registry.yarnpkg.com/string-replace-webpack-plugin/-/string-replace-webpack-plugin-0.1.3.tgz#73c657e759d66cfe80ae1e0cf091aa256d0e715c"
diff --git a/pkg/util/stop/stopper.go b/pkg/util/stop/stopper.go
index ab30e9308d2f..bc0a7ae660b1 100644
--- a/pkg/util/stop/stopper.go
+++ b/pkg/util/stop/stopper.go
@@ -461,6 +461,11 @@ func (s *Stopper) RunAsyncTaskEx(ctx context.Context, opt TaskOpts, f func(conte
}
// If the caller has a span, the task gets a child span.
+ //
+ // Note that we have to create the child in this parent goroutine; we can't
+ // defer the creation to the spawned async goroutine since the parent span
+ // might get Finish()ed by then. However, we'll update the child'd goroutine
+ // ID.
var sp *tracing.Span
switch opt.SpanOpt {
case FollowsFromSpan:
@@ -480,6 +485,7 @@ func (s *Stopper) RunAsyncTaskEx(ctx context.Context, opt TaskOpts, f func(conte
defer s.runPostlude()
if sp != nil {
defer sp.Finish()
+ sp.UpdateGoroutineIDToCurrent()
}
if alloc != nil {
defer alloc.Release()
diff --git a/pkg/util/tracing/BUILD.bazel b/pkg/util/tracing/BUILD.bazel
index fa4c8a23bc83..3210ab249ec2 100644
--- a/pkg/util/tracing/BUILD.bazel
+++ b/pkg/util/tracing/BUILD.bazel
@@ -14,6 +14,7 @@ go_library(
"tags.go",
"test_utils.go",
"tracer.go",
+ "tracer_snapshots.go",
"utils.go",
],
importpath = "github.com/cockroachdb/cockroach/pkg/util/tracing",
diff --git a/pkg/util/tracing/crdbspan.go b/pkg/util/tracing/crdbspan.go
index 5cb5c93ab98f..40e096255635 100644
--- a/pkg/util/tracing/crdbspan.go
+++ b/pkg/util/tracing/crdbspan.go
@@ -35,7 +35,6 @@ type crdbSpan struct {
traceID tracingpb.TraceID // probabilistically unique
spanID tracingpb.SpanID // probabilistically unique
parentSpanID tracingpb.SpanID
- goroutineID uint64
operation string // name of operation associated with the span
startTime time.Time
@@ -66,6 +65,10 @@ type childRef struct {
type crdbSpanMu struct {
syncutil.Mutex
+ // goroutineID is the ID of the goroutine that created this span, or the goroutine that
+ // subsequently adopted it through Span.UpdateGoroutineIDToCurrent()).
+ goroutineID uint64
+
// parent is the span's local parent, if any. parent is not set if the span is
// a root or the parent span is remote.
//
@@ -527,7 +530,7 @@ func (s *crdbSpan) getRecordingNoChildrenLocked(
TraceID: s.traceID,
SpanID: s.spanID,
ParentSpanID: s.parentSpanID,
- GoroutineID: s.goroutineID,
+ GoroutineID: s.mu.goroutineID,
Operation: s.operation,
StartTime: s.startTime,
Duration: s.mu.duration,
@@ -727,6 +730,13 @@ func (s *crdbSpan) withLock(f func()) {
f()
}
+// setGoroutineID updates the span's goroutine ID.
+func (s *crdbSpan) setGoroutineID(gid int64) {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ s.mu.goroutineID = uint64(gid)
+}
+
var sortPool = sync.Pool{
New: func() interface{} {
return &Recording{}
diff --git a/pkg/util/tracing/span.go b/pkg/util/tracing/span.go
index c2c738274f92..70578a035207 100644
--- a/pkg/util/tracing/span.go
+++ b/pkg/util/tracing/span.go
@@ -18,6 +18,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
"github.com/cockroachdb/cockroach/pkg/util/tracing/tracingpb"
+ "github.com/petermattis/goid"
"go.opentelemetry.io/otel/attribute"
oteltrace "go.opentelemetry.io/otel/trace"
)
@@ -288,6 +289,16 @@ func (sp *Span) IsSterile() bool {
return sp.i.sterile
}
+// UpdateGoroutineIDToCurrent updates the span's goroutine ID to the current
+// goroutine. This should be called when a different goroutine takes ownership
+// of a span.
+func (sp *Span) UpdateGoroutineIDToCurrent() {
+ if sp.detectUseAfterFinish() {
+ return
+ }
+ sp.i.crdb.setGoroutineID(goid.Get())
+}
+
// SpanMeta is information about a Span that is not local to this
// process. Typically, SpanMeta is populated from information
// about a Span on the other end of an RPC, and is used to derive
diff --git a/pkg/util/tracing/tracer.go b/pkg/util/tracing/tracer.go
index 1bc4d6aec6ff..91021a7ad676 100644
--- a/pkg/util/tracing/tracer.go
+++ b/pkg/util/tracing/tracer.go
@@ -28,6 +28,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/iterutil"
"github.com/cockroachdb/cockroach/pkg/util/netutil/addr"
"github.com/cockroachdb/cockroach/pkg/util/randutil"
+ "github.com/cockroachdb/cockroach/pkg/util/ring"
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/cockroach/pkg/util/tracing/tracingpb"
@@ -59,6 +60,9 @@ const (
// maxLogsPerSpanExternal limits the number of logs in a Span for external
// tracers (net/trace, OpenTelemetry); use a comfortable limit.
maxLogsPerSpanExternal = 1000
+ // maxSnapshots limits the number of snapshots that a Tracer will hold in
+ // memory. Beyond this limit, each new snapshot evicts the oldest one.
+ maxSnapshots = 10
)
// These constants are used to form keys to represent tracing context
@@ -239,7 +243,14 @@ type Tracer struct {
// activeSpans is a map that references all non-Finish'ed local root spans,
// i.e. those for which no WithParent() option was supplied.
- activeSpansRegistry *spanRegistry
+ activeSpansRegistry *SpanRegistry
+ snapshotsMu struct {
+ syncutil.Mutex
+ // snapshots stores the activeSpansRegistry snapshots taken during the
+ // Tracer's lifetime. The ring buffer will contain snapshots with contiguous
+ // IDs, from the oldest one to + maxSnapshots - 1.
+ snapshots ring.Buffer // snapshotWithID
+ }
testingMu syncutil.Mutex // protects testingRecordAsyncSpans
testingRecordAsyncSpans bool // see TestingRecordAsyncSpans
@@ -263,7 +274,7 @@ type Tracer struct {
stack string
}
-// spanRegistry is a map that references all non-Finish'ed local root spans,
+// SpanRegistry is a map that references all non-Finish'ed local root spans,
// i.e. those for which no WithLocalParent() option was supplied. The
// map is keyed on the span ID, which is deterministically unique.
//
@@ -272,30 +283,30 @@ type Tracer struct {
//
// The map can be introspected by `Tracer.VisitSpans`. A Span can also be
// retrieved from its ID by `Tracer.GetActiveSpanByID`.
-type spanRegistry struct {
+type SpanRegistry struct {
mu struct {
syncutil.Mutex
m map[tracingpb.SpanID]*crdbSpan
}
}
-func makeSpanRegistry() *spanRegistry {
- r := &spanRegistry{}
+func makeSpanRegistry() *SpanRegistry {
+ r := &SpanRegistry{}
r.mu.m = make(map[tracingpb.SpanID]*crdbSpan)
return r
}
-func (r *spanRegistry) removeSpanLocked(id tracingpb.SpanID) {
+func (r *SpanRegistry) removeSpanLocked(id tracingpb.SpanID) {
delete(r.mu.m, id)
}
-func (r *spanRegistry) addSpan(s *crdbSpan) {
+func (r *SpanRegistry) addSpan(s *crdbSpan) {
r.mu.Lock()
defer r.mu.Unlock()
r.addSpanLocked(s)
}
-func (r *spanRegistry) addSpanLocked(s *crdbSpan) {
+func (r *SpanRegistry) addSpanLocked(s *crdbSpan) {
// Ensure that the registry does not grow unboundedly in case there is a leak.
// When the registry reaches max size, each new span added kicks out an
// arbitrary existing span. We rely on map iteration order here to make this
@@ -310,7 +321,7 @@ func (r *spanRegistry) addSpanLocked(s *crdbSpan) {
}
// getSpanByID looks up a span in the registry. Returns nil if not found.
-func (r *spanRegistry) getSpanByID(id tracingpb.SpanID) RegistrySpan {
+func (r *SpanRegistry) getSpanByID(id tracingpb.SpanID) RegistrySpan {
r.mu.Lock()
defer r.mu.Unlock()
crdbSpan, ok := r.mu.m[id]
@@ -321,7 +332,10 @@ func (r *spanRegistry) getSpanByID(id tracingpb.SpanID) RegistrySpan {
return crdbSpan
}
-func (r *spanRegistry) visitSpans(visitor func(span RegistrySpan) error) error {
+// VisitSpans calls the visitor callback for every local root span in the
+// registry. Iterations stops when the visitor returns an error. If that error
+// is iterutils.StopIteration(), then VisitSpans() returns nil.
+func (r *SpanRegistry) VisitSpans(visitor func(span RegistrySpan) error) error {
r.mu.Lock()
sl := make([]*crdbSpan, 0, len(r.mu.m))
for _, sp := range r.mu.m {
@@ -343,7 +357,7 @@ func (r *spanRegistry) visitSpans(visitor func(span RegistrySpan) error) error {
// testingAll returns (pointers to) all the spans in the registry, in an
// arbitrary order. Since spans can generally finish at any point and use of a
// finished span is not permitted, this method is only suitable for tests.
-func (r *spanRegistry) testingAll() []*crdbSpan {
+func (r *SpanRegistry) testingAll() []*crdbSpan {
r.mu.Lock()
defer r.mu.Unlock()
res := make([]*crdbSpan, 0, len(r.mu.m))
@@ -358,7 +372,7 @@ func (r *spanRegistry) testingAll() []*crdbSpan {
// removing the parent from the registry, the children are accessible in the
// registry through that parent; if we didn't do this swap when the parent is
// removed, the children would not be part of the registry anymore.
-func (r *spanRegistry) swap(parentID tracingpb.SpanID, children []*crdbSpan) {
+func (r *SpanRegistry) swap(parentID tracingpb.SpanID, children []*crdbSpan) {
r.mu.Lock()
defer r.mu.Unlock()
r.removeSpanLocked(parentID)
@@ -857,7 +871,6 @@ child operation: %s, tracer created at:
traceID = tracingpb.TraceID(randutil.FastInt63())
}
spanID := tracingpb.SpanID(randutil.FastInt63())
- goroutineID := uint64(goid.Get())
// Now allocate the main *Span and contained crdbSpan.
// Allocate these together to save on individual allocs.
@@ -876,15 +889,15 @@ child operation: %s, tracer created at:
}{}
helper.crdbSpan = crdbSpan{
- tracer: t,
- traceID: traceID,
- spanID: spanID,
- goroutineID: goroutineID,
- startTime: startTime,
- logTags: opts.LogTags,
+ tracer: t,
+ traceID: traceID,
+ spanID: spanID,
+ startTime: startTime,
+ logTags: opts.LogTags,
mu: crdbSpanMu{
- duration: -1, // unfinished
- tags: helper.tagsAlloc[:0],
+ duration: -1, // unfinished
+ tags: helper.tagsAlloc[:0],
+ goroutineID: uint64(goid.Get()),
},
}
helper.crdbSpan.operation = opName
@@ -1139,7 +1152,7 @@ func (t *Tracer) GetActiveSpanByID(spanID tracingpb.SpanID) RegistrySpan {
// VisitSpans invokes the visitor with all active Spans. The function will
// gracefully exit if the visitor returns iterutil.StopIteration().
func (t *Tracer) VisitSpans(visitor func(span RegistrySpan) error) error {
- return t.activeSpansRegistry.visitSpans(visitor)
+ return t.activeSpansRegistry.VisitSpans(visitor)
}
// TestingRecordAsyncSpans is a test-only helper that configures
@@ -1180,6 +1193,11 @@ func (t *Tracer) PanicOnUseAfterFinish() bool {
return t.panicOnUseAfterFinish
}
+// SpanRegistry exports the registry containing all currently-open spans.
+func (t *Tracer) SpanRegistry() *SpanRegistry {
+ return t.activeSpansRegistry
+}
+
// ForkSpan forks the current span, if any[1]. Forked spans "follow from" the
// original, and are typically used to trace operations that may outlive the
// parent (think async tasks). See the package-level documentation for more
diff --git a/pkg/util/tracing/tracer_snapshots.go b/pkg/util/tracing/tracer_snapshots.go
new file mode 100644
index 000000000000..8f4ce784202f
--- /dev/null
+++ b/pkg/util/tracing/tracer_snapshots.go
@@ -0,0 +1,195 @@
+// Copyright 2021 The Cockroach Authors.
+//
+// Use of this software is governed by the Business Source License
+// included in the file licenses/BSL.txt.
+//
+// As of the Change Date specified in that file, in accordance with
+// the Business Source License, use of this software will be governed
+// by the Apache License, Version 2.0, included in the file
+// licenses/APL.txt.
+
+package tracing
+
+import (
+ "bufio"
+ "runtime"
+ "strconv"
+ "strings"
+ "time"
+
+ "github.com/cockroachdb/cockroach/pkg/util/timeutil"
+ "github.com/cockroachdb/errors"
+)
+
+// SpansSnapshot represents a snapshot of all the open spans at a certain point
+// in time.
+type SpansSnapshot struct {
+ // CapturedAt is the time when the snapshot was collected.
+ CapturedAt time.Time
+ // Traces contains the collected traces. Each "local route" corresponds to one
+ // trace. "Detached recording" spans are included in the parent's trace.
+ Traces []Recording
+ // Stacks is a map from groutine ID to the goroutine's stack trace. All
+ // goroutines running at the time when this snapshot is produced are
+ // represented here. A goroutine referenced by a span's GoroutineID field will
+ // not be present if it has finished since the respective span was started.
+ Stacks map[int]string
+ // Err is set if an error was encountered when producing the snapshot. The
+ // snapshot will be incomplete in this case.
+ Err error
+}
+
+// SnapshotID identifies a spans snapshot. The ID can be used to retrieve a
+// specific snapshot from the Tracer.
+type SnapshotID int
+type snapshotWithID struct {
+ ID SnapshotID
+ SpansSnapshot
+}
+
+// SaveSnapshot collects a snapshot of the currently open spans (i.e. the
+// contents of the Tracer's active spans registry) saves it in-memory in the
+// Tracer. The snapshot's ID is returned; the snapshot can be retrieved
+// subsequently through t.GetSnapshot(id).
+//
+// Snapshots also include a dump of all the goroutine stack traces.
+func (t *Tracer) SaveSnapshot() SnapshotID {
+ snap := t.generateSnapshot()
+ t.snapshotsMu.Lock()
+ defer t.snapshotsMu.Unlock()
+
+ snapshots := &t.snapshotsMu.snapshots
+
+ if snapshots.Len() == maxSnapshots {
+ snapshots.RemoveFirst()
+ }
+ var id SnapshotID
+ if snapshots.Len() == 0 {
+ id = 1
+ } else {
+ id = snapshots.GetLast().(snapshotWithID).ID + 1
+ }
+ snapshots.AddLast(snapshotWithID{
+ ID: id,
+ SpansSnapshot: snap,
+ })
+ return id
+}
+
+var errSnapshotTooOld = errors.Newf(
+ "the requested snapshot is too old and has been deleted. "+
+ "Only the last %d snapshots are stored.", maxSnapshots)
+
+var errSnapshotDoesntExist = errors.New("the requested snapshot doesn't exist")
+
+// GetSnapshot returns the snapshot with the given ID. If the ID is below the
+// minimum stored snapshot, then the requested snapshot must have been
+// garbage-collected and errSnapshotTooOld is returned. If the snapshot id is
+// beyond the maximum stored ID, errSnapshotDoesntExist is returned.
+//
+// Note that SpansSpanshot has an Err field through which errors are returned.
+// In these error cases, the snapshot will be incomplete.
+func (t *Tracer) GetSnapshot(id SnapshotID) (SpansSnapshot, error) {
+ t.snapshotsMu.Lock()
+ defer t.snapshotsMu.Unlock()
+ snapshots := &t.snapshotsMu.snapshots
+
+ if snapshots.Len() == 0 {
+ return SpansSnapshot{}, errSnapshotDoesntExist
+ }
+ minID := snapshots.GetFirst().(snapshotWithID).ID
+ if id < minID {
+ return SpansSnapshot{}, errSnapshotTooOld
+ }
+ maxID := snapshots.GetLast().(snapshotWithID).ID
+ if id > maxID {
+ return SpansSnapshot{}, errSnapshotDoesntExist
+ }
+
+ return snapshots.Get(int(id - minID)).(snapshotWithID).SpansSnapshot, nil
+}
+
+// SnapshotInfo represents minimal info about a stored snapshot, as returned by
+// Tracer.GetSnapshots().
+type SnapshotInfo struct {
+ ID SnapshotID
+ CapturedAt time.Time
+}
+
+// GetSnapshots returns info on all stored span snapshots.
+func (t *Tracer) GetSnapshots() []SnapshotInfo {
+ t.snapshotsMu.Lock()
+ defer t.snapshotsMu.Unlock()
+ snapshots := &t.snapshotsMu.snapshots
+
+ res := make([]SnapshotInfo, snapshots.Len())
+ for i := 0; i < snapshots.Len(); i++ {
+ s := snapshots.Get(i).(snapshotWithID)
+ res[i] = SnapshotInfo{
+ ID: s.ID,
+ CapturedAt: s.CapturedAt,
+ }
+ }
+ return res
+}
+
+// generateSnapshot produces a snapshot of all the currently open spans and
+// all the current goroutine stacktraces.
+//
+// Note that SpansSpanshot has an Err field through which errors are returned.
+// In these error cases, the snapshot will be incomplete.
+func (t *Tracer) generateSnapshot() SpansSnapshot {
+ capturedAt := timeutil.Now()
+ // Collect the traces.
+ traces := make([]Recording, 0, 1000)
+ _ = t.SpanRegistry().VisitSpans(func(sp RegistrySpan) error {
+ rec := sp.GetFullRecording(RecordingVerbose)
+ traces = append(traces, rec)
+ return nil
+ })
+
+ // Collect and parse the goroutine stack traces.
+
+ // We don't know how big the traces are, so grow a few times if they don't
+ // fit. Start large, though.
+ var stacks []byte
+ for n := 1 << 20; /* 1mb */ n <= (1 << 29); /* 512mb */ n *= 2 {
+ stacks = make([]byte, n)
+ nbytes := runtime.Stack(stacks, true /* all */)
+ if nbytes < len(stacks) {
+ break
+ }
+ }
+
+ splits := strings.Split(string(stacks), "\n\n")
+ stackMap := make(map[int]string, len(splits))
+ var parseErr error
+ for _, s := range splits {
+ // Parse the goroutine ID. The first line of each stack is expected to look like:
+ // goroutine 115 [chan receive]:
+ scanner := bufio.NewScanner(strings.NewReader(s))
+ scanner.Split(bufio.ScanWords)
+ // Skip the word "goroutine".
+ if !scanner.Scan() {
+ parseErr = errors.Errorf("unexpected end of string")
+ break
+ }
+ // Scan the goroutine ID.
+ if !scanner.Scan() {
+ parseErr = errors.Errorf("unexpected end of string")
+ break
+ }
+ goroutineID, err := strconv.Atoi(scanner.Text())
+ if err != nil {
+ panic(err)
+ }
+ stackMap[goroutineID] = s
+ }
+
+ return SpansSnapshot{
+ CapturedAt: capturedAt,
+ Traces: traces,
+ Stacks: stackMap,
+ Err: parseErr,
+ }
+}
diff --git a/pkg/util/tracing/tracingpb/recorded_span.go b/pkg/util/tracing/tracingpb/recorded_span.go
index b398962c941d..2bb4862a5104 100644
--- a/pkg/util/tracing/tracingpb/recorded_span.go
+++ b/pkg/util/tracing/tracingpb/recorded_span.go
@@ -25,6 +25,25 @@ type TraceID uint64
// SpanID is a probabilistically-unique span id.
type SpanID uint64
+// Recording represents a group of RecordedSpans rooted at a fixed root span, as
+// returned by GetRecording. Spans are sorted by StartTime.
+type Recording []RecordedSpan
+
+// Less implements sort.Interface.
+func (r Recording) Less(i, j int) bool {
+ return r[i].StartTime.Before(r[j].StartTime)
+}
+
+// Swap implements sort.Interface.
+func (r Recording) Swap(i, j int) {
+ r[i], r[j] = r[j], r[i]
+}
+
+// Len implements sort.Interface.
+func (r Recording) Len() int {
+ return len(r)
+}
+
// LogMessageField is the field name used for the log message in a LogRecord.
const LogMessageField = "event"
diff --git a/pkg/util/tracing/tracingui/BUILD.bazel b/pkg/util/tracing/tracingui/BUILD.bazel
new file mode 100644
index 000000000000..b225899b887e
--- /dev/null
+++ b/pkg/util/tracing/tracingui/BUILD.bazel
@@ -0,0 +1,17 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+
+go_library(
+ name = "tracingui",
+ srcs = ["span_registry_ui.go"],
+ importpath = "github.com/cockroachdb/cockroach/pkg/util/tracing/tracingui",
+ visibility = ["//visibility:public"],
+ deps = [
+ "//pkg/ui",
+ "//pkg/util/log",
+ "//pkg/util/sysutil",
+ "//pkg/util/timeutil",
+ "//pkg/util/tracing",
+ "//pkg/util/tracing/tracingpb",
+ "@com_github_cockroachdb_errors//:errors",
+ ],
+)
diff --git a/pkg/util/tracing/tracingui/span_registry_ui.go b/pkg/util/tracing/tracingui/span_registry_ui.go
new file mode 100644
index 000000000000..cdf23044f7a0
--- /dev/null
+++ b/pkg/util/tracing/tracingui/span_registry_ui.go
@@ -0,0 +1,277 @@
+// Copyright 2021 The Cockroach Authors.
+//
+// Use of this software is governed by the Business Source License
+// included in the file licenses/BSL.txt.
+//
+// As of the Change Date specified in that file, in accordance with
+// the Business Source License, use of this software will be governed
+// by the Apache License, Version 2.0, included in the file
+// licenses/APL.txt.
+
+package tracingui
+
+import (
+ "context"
+ "fmt"
+ "html/template"
+ "net/http"
+ "net/url"
+ "strconv"
+ "time"
+
+ "github.com/cockroachdb/cockroach/pkg/ui"
+ "github.com/cockroachdb/cockroach/pkg/util/log"
+ "github.com/cockroachdb/cockroach/pkg/util/sysutil"
+ "github.com/cockroachdb/cockroach/pkg/util/timeutil"
+ "github.com/cockroachdb/cockroach/pkg/util/tracing"
+ "github.com/cockroachdb/cockroach/pkg/util/tracing/tracingpb"
+ "github.com/cockroachdb/errors"
+)
+
+// This file deals with producing the /debug/tracez page, which lists
+// a snapshot of the spans in the Tracer's active spans registry.
+
+const dateAndtimeFormat = "2006-01-02 15:04:05.000"
+const timeFormat = "15:04:05.000"
+
+// RegisterHTTPHandlers registers the /debug/tracez handlers, and helpers.
+func RegisterHTTPHandlers(ambientCtx log.AmbientContext, mux *http.ServeMux, tr *tracing.Tracer) {
+ fileServer := http.StripPrefix("/debug/assets/", http.FileServer(http.FS(ui.ListJS)))
+ mux.HandleFunc("/debug/tracez",
+ func(w http.ResponseWriter, req *http.Request) {
+ serveHTTP(ambientCtx.AnnotateCtx(context.Background()), w, req, tr)
+ })
+ mux.HandleFunc("/debug/show-trace",
+ func(w http.ResponseWriter, req *http.Request) {
+ serveHTTPTrace(ambientCtx.AnnotateCtx(req.Context()), w, req, tr)
+ })
+ mux.HandleFunc("/debug/assets/list.min.js", fileServer.ServeHTTP)
+}
+
+type pageData struct {
+ SnapshotID tracing.SnapshotID
+ Now time.Time
+ CapturedAt time.Time
+ Err error
+ AllSnapshots []tracing.SnapshotInfo
+ SpansList spansList
+}
+
+type spansList struct {
+ Spans []tracingpb.RecordedSpan
+ // Stacks contains stack traces for the goroutines referenced by the Spans
+ // through their GoroutineID field.
+ Stacks map[int]string // GoroutineID to stack trace
+}
+
+var spansTableTemplate *template.Template
+
+func init() {
+ spansTableTemplate = template.Must(template.New("spans-list").Funcs(
+ template.FuncMap{
+ "formatTime": formatTime,
+ "formatTimeNoMillis": formatTimeNoMillis,
+ "since": func(t time.Time, capturedAt time.Time) string {
+ return fmt.Sprintf("(%s ago)", formatDuration(capturedAt.Sub(t)))
+ },
+ "timeRaw": func(t time.Time) int64 { return t.UnixMicro() },
+ },
+ ).Parse(ui.SpansTableTemplateSrc))
+}
+
+func formatTime(t time.Time) string {
+ t = t.UTC()
+ if t.Truncate(24*time.Hour) == timeutil.Now().Truncate(24*time.Hour) {
+ return t.Format(timeFormat)
+ }
+ return t.Format(dateAndtimeFormat)
+}
+
+func formatTimeNoMillis(t time.Time) string {
+ t = t.UTC()
+ if t.Truncate(24*time.Hour) == timeutil.Now().Truncate(24*time.Hour) {
+ const format = "15:04:05"
+ return t.Format(format)
+ }
+ const format = "2006-01-02 15:04:05"
+ return t.Format(format)
+}
+
+// formatDuration formats a duration in one of the following formats, depending
+// on its magnitude.
+// 0.001s
+// 1.000s
+// 1m01s
+// 1h05m01s
+// 1d02h05m
+func formatDuration(d time.Duration) string {
+ d = d.Round(time.Millisecond)
+ days := d / (24 * time.Hour)
+ d -= days * 24 * time.Hour
+ h := d / time.Hour
+ d -= h * time.Hour
+ m := d / time.Minute
+ d -= m * time.Minute
+ s := d / time.Second
+ d -= s * time.Second
+ millis := d / time.Millisecond
+ if days != 0 {
+ return fmt.Sprintf("%dd%02dh%02dm", days, h, m)
+ }
+ if h != 0 {
+ return fmt.Sprintf("%dh%02dm%02ds", h, m, s)
+ }
+ if m != 0 {
+ return fmt.Sprintf("%dm%02ds", m, s)
+ }
+ return fmt.Sprintf("%d.%03ds", s, millis)
+}
+
+func serveHTTP(ctx context.Context, w http.ResponseWriter, r *http.Request, tr *tracing.Tracer) {
+ w.Header().Set("Content-Type", "text/html; charset=utf-8")
+ var pageErr error
+ if err := r.ParseForm(); err != nil {
+ log.Warningf(ctx, "error parsing /tracez form: %s", err.Error())
+ pageErr = err
+ }
+
+ snapID := r.Form.Get("snap")
+ var snapshot tracing.SpansSnapshot
+ var snapshotID tracing.SnapshotID
+ switch snapID {
+ case "new":
+ // Capture a new snapshot and return a redirect to that snapshot's ID.
+ snapshotID = tr.SaveSnapshot()
+ newURL, err := r.URL.Parse(fmt.Sprintf("?snap=%d", snapshotID))
+ if err != nil {
+ pageErr = err
+ break
+ }
+ http.Redirect(w, r, newURL.String(), http.StatusFound)
+ return
+ case "":
+ default:
+ id, err := strconv.Atoi(snapID)
+ if err != nil {
+ pageErr = errors.Errorf("invalid snapshot ID: %s", snapID)
+ break
+ }
+ snapshotID = tracing.SnapshotID(id)
+ snapshot, err = tr.GetSnapshot(snapshotID)
+ if err != nil {
+ pageErr = err
+ break
+ }
+ }
+
+ // Flatten the recordings.
+ spans := make([]tracingpb.RecordedSpan, 0, len(snapshot.Traces)*3)
+ for _, r := range snapshot.Traces {
+ spans = append(spans, r...)
+ }
+
+ // Copy the stack traces and augment the map.
+ stacks := make(map[int]string, len(snapshot.Stacks))
+ for k, v := range snapshot.Stacks {
+ stacks[k] = v
+ }
+ // Fill in messages for the goroutines for which we don't have a stack trace.
+ for _, s := range spans {
+ gid := int(s.GoroutineID)
+ if _, ok := stacks[gid]; !ok {
+ stacks[gid] = "Goroutine not found. Goroutine must have finished since the span was created."
+ }
+ }
+
+ if pageErr != nil {
+ snapshot.Err = pageErr
+ }
+ err := spansTableTemplate.ExecuteTemplate(w, "spans-list", pageData{
+ Now: timeutil.Now(),
+ CapturedAt: snapshot.CapturedAt,
+ SnapshotID: snapshotID,
+ AllSnapshots: tr.GetSnapshots(),
+ Err: snapshot.Err,
+ SpansList: spansList{
+ Spans: spans,
+ Stacks: stacks,
+ },
+ })
+ if err != nil {
+ // We can get a "connection reset by peer" error if the browser requesting
+ // the page has gone away.
+ if !sysutil.IsErrConnectionReset(err) {
+ log.Warningf(ctx, "error executing tracez template: %s", err)
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ }
+ }
+}
+
+func serveHTTPTrace(
+ ctx context.Context, w http.ResponseWriter, r *http.Request, tr *tracing.Tracer,
+) {
+ w.Header().Set("Content-Type", "text/html; charset=utf-8")
+ var err error
+ if err = r.ParseForm(); err != nil {
+ goto Error
+ }
+
+ {
+ var traceID tracingpb.TraceID
+ tid := r.Form.Get("trace")
+ if tid == "" {
+ err = errors.Errorf("trace ID missing; use ?trace=")
+ goto Error
+ }
+ var id int
+ id, err = strconv.Atoi(tid)
+ if err != nil {
+ err = errors.Errorf("invalid trace ID: %s", tid)
+ goto Error
+ }
+ traceID = tracingpb.TraceID(id)
+
+ var snapshotID tracing.SnapshotID
+ var snapshot tracing.SpansSnapshot
+ snapID := r.Form.Get("snap")
+ if snapID != "" {
+ var id int
+ id, err = strconv.Atoi(snapID)
+ if err != nil {
+ err = errors.Errorf("invalid snapshot ID: %s", snapID)
+ goto Error
+ }
+ snapshotID = tracing.SnapshotID(id)
+ snapshot, err = tr.GetSnapshot(snapshotID)
+ if err != nil {
+ goto Error
+ }
+ } else {
+ // If no snapshot is specified, we'll take a new one now and redirect to
+ // it.
+ snapshotID = tr.SaveSnapshot()
+ var newURL *url.URL
+ newURL, err = r.URL.Parse(fmt.Sprintf("?trace=%d&snap=%d", traceID, snapshotID))
+ if err != nil {
+ goto Error
+ }
+ http.Redirect(w, r, newURL.String(), http.StatusFound)
+ return
+ }
+
+ for _, r := range snapshot.Traces {
+ if r[0].TraceID == traceID {
+ _, err = w.Write([]byte("" + r.String() + "\n
"))
+ if err != nil {
+ goto Error
+ }
+ return
+ }
+ }
+ err = errors.Errorf("trace %d not found in snapshot", traceID)
+ goto Error
+ }
+
+Error:
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+}