From 168c3e352c67e9d685c1f2143403b21a4e735768 Mon Sep 17 00:00:00 2001 From: Andrei Matei Date: Wed, 29 Dec 2021 18:18:32 -0500 Subject: [PATCH] tracing: correct goroutine IDs for async tasks Spans capture the ID of the goroutine that created them, and expose it in instrumentation. This patch makes the goroutine ID mutable, and teaches the Stopper to update it for async tasks. This way, the spans for the async tasks will have the correct goroutine ID - the task's goroutine. Release note: None --- pkg/util/stop/stopper.go | 6 ++++++ pkg/util/tracing/crdbspan.go | 14 ++++++++++++-- pkg/util/tracing/span.go | 11 +++++++++++ pkg/util/tracing/tracer.go | 17 ++++++++--------- 4 files changed, 37 insertions(+), 11 deletions(-) diff --git a/pkg/util/stop/stopper.go b/pkg/util/stop/stopper.go index ab30e9308d2f..bc0a7ae660b1 100644 --- a/pkg/util/stop/stopper.go +++ b/pkg/util/stop/stopper.go @@ -461,6 +461,11 @@ func (s *Stopper) RunAsyncTaskEx(ctx context.Context, opt TaskOpts, f func(conte } // If the caller has a span, the task gets a child span. + // + // Note that we have to create the child in this parent goroutine; we can't + // defer the creation to the spawned async goroutine since the parent span + // might get Finish()ed by then. However, we'll update the child'd goroutine + // ID. var sp *tracing.Span switch opt.SpanOpt { case FollowsFromSpan: @@ -480,6 +485,7 @@ func (s *Stopper) RunAsyncTaskEx(ctx context.Context, opt TaskOpts, f func(conte defer s.runPostlude() if sp != nil { defer sp.Finish() + sp.UpdateGoroutineIDToCurrent() } if alloc != nil { defer alloc.Release() diff --git a/pkg/util/tracing/crdbspan.go b/pkg/util/tracing/crdbspan.go index e246967f907c..9cdea40b8ef8 100644 --- a/pkg/util/tracing/crdbspan.go +++ b/pkg/util/tracing/crdbspan.go @@ -35,7 +35,6 @@ type crdbSpan struct { traceID tracingpb.TraceID // probabilistically unique spanID tracingpb.SpanID // probabilistically unique parentSpanID tracingpb.SpanID - goroutineID uint64 operation string // name of operation associated with the span startTime time.Time @@ -66,6 +65,10 @@ type childRef struct { type crdbSpanMu struct { syncutil.Mutex + // goroutineID is the ID of the goroutine that created this span, or the goroutine that + // subsequently adopted it through Span.UpdateGoroutineIDToCurrent()). + goroutineID uint64 + // parent is the span's local parent, if any. parent is not set if the span is // a root or the parent span is remote. // @@ -531,7 +534,7 @@ func (s *crdbSpan) getRecordingNoChildrenLocked( TraceID: s.traceID, SpanID: s.spanID, ParentSpanID: s.parentSpanID, - GoroutineID: s.goroutineID, + GoroutineID: s.mu.goroutineID, Operation: s.operation, StartTime: s.startTime, Duration: s.mu.duration, @@ -731,6 +734,13 @@ func (s *crdbSpan) withLock(f func()) { f() } +// setGoroutineID updates the span's goroutine ID. +func (s *crdbSpan) setGoroutineID(gid int64) { + s.mu.Lock() + defer s.mu.Unlock() + s.mu.goroutineID = uint64(gid) +} + var sortPool = sync.Pool{ New: func() interface{} { return &Recording{} diff --git a/pkg/util/tracing/span.go b/pkg/util/tracing/span.go index c2c738274f92..70578a035207 100644 --- a/pkg/util/tracing/span.go +++ b/pkg/util/tracing/span.go @@ -18,6 +18,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/protoutil" "github.com/cockroachdb/cockroach/pkg/util/tracing/tracingpb" + "github.com/petermattis/goid" "go.opentelemetry.io/otel/attribute" oteltrace "go.opentelemetry.io/otel/trace" ) @@ -288,6 +289,16 @@ func (sp *Span) IsSterile() bool { return sp.i.sterile } +// UpdateGoroutineIDToCurrent updates the span's goroutine ID to the current +// goroutine. This should be called when a different goroutine takes ownership +// of a span. +func (sp *Span) UpdateGoroutineIDToCurrent() { + if sp.detectUseAfterFinish() { + return + } + sp.i.crdb.setGoroutineID(goid.Get()) +} + // SpanMeta is information about a Span that is not local to this // process. Typically, SpanMeta is populated from information // about a Span on the other end of an RPC, and is used to derive diff --git a/pkg/util/tracing/tracer.go b/pkg/util/tracing/tracer.go index 1bc4d6aec6ff..5634f71f7159 100644 --- a/pkg/util/tracing/tracer.go +++ b/pkg/util/tracing/tracer.go @@ -857,7 +857,6 @@ child operation: %s, tracer created at: traceID = tracingpb.TraceID(randutil.FastInt63()) } spanID := tracingpb.SpanID(randutil.FastInt63()) - goroutineID := uint64(goid.Get()) // Now allocate the main *Span and contained crdbSpan. // Allocate these together to save on individual allocs. @@ -876,15 +875,15 @@ child operation: %s, tracer created at: }{} helper.crdbSpan = crdbSpan{ - tracer: t, - traceID: traceID, - spanID: spanID, - goroutineID: goroutineID, - startTime: startTime, - logTags: opts.LogTags, + tracer: t, + traceID: traceID, + spanID: spanID, + startTime: startTime, + logTags: opts.LogTags, mu: crdbSpanMu{ - duration: -1, // unfinished - tags: helper.tagsAlloc[:0], + duration: -1, // unfinished + tags: helper.tagsAlloc[:0], + goroutineID: uint64(goid.Get()), }, } helper.crdbSpan.operation = opName