Skip to content

Commit

Permalink
feat: configurable probabilistic tracing of gateway requests
Browse files Browse the repository at this point in the history
  • Loading branch information
aschmahmann committed Jun 24, 2024
1 parent 19e3b26 commit 395c2e1
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 30 deletions.
16 changes: 11 additions & 5 deletions docs/environment-variables.md
Original file line number Diff line number Diff line change
Expand Up @@ -299,18 +299,24 @@ specifications. Configuration environment variables are specified in the
[OpenTelemetry Environment Variable Specification] where possible. The
[Boxo Tracing] documentation is the basis for tracing here.

Two major distinctions from the more general tracing enabled in boxo are:
1. When tracing is enabled it is restricted to flows through HTTP Gateway requests,
rather than also included background processes
2. Requests are only traced when there is a `Traceparent` header passed that is valid
According to the [Trace Context] specification
A major distinctions from the more general tracing enabled in boxo is that when
tracing is enabled it is restricted to flows through HTTP Gateway requests, rather
than also included background processes.

Note: requests are also traced when there is a `Traceparent` header passed that is valid
According to the [Trace Context] specification, even if the sampling fraction is set to 0.

### `RAINBOW_TRACING_AUTH`

The ability to pass `Traceparent` or `Tracestate` headers is guarded by an
`Authorization` header. The value of the `Authorization` header should match
the value in the `RAINBOW_TRACING_AUTH` environment variable.

### `RAINBOW_SAMPLING_FRACTION`

The fraction (between 0 and 1) of requests that should be sampled.
This is calculated independently of any Traceparent based sampling.

[Boxo Tracing]: https://github.com/ipfs/boxo/blob/main/docs/tracing.md
[Open Telemetry]: https://opentelemetry.io/
[OpenTelemetry Environment Variable Specification]: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/sdk-environment-variables.md
Expand Down
18 changes: 17 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ require (
github.com/go-logr/logr v1.4.1 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-ole/go-ole v1.3.0 // indirect
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
github.com/godbus/dbus/v5 v5.1.0 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/gddo v0.0.0-20210115222349-20d68f94ee1f // indirect
Expand Down Expand Up @@ -146,6 +146,22 @@ require (
github.com/opentracing/opentracing-go v1.2.0 // indirect
github.com/openzipkin/zipkin-go v0.4.3 // indirect
github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9 // indirect
github.com/pion/datachannel v1.5.6 // indirect
github.com/pion/dtls/v2 v2.2.11 // indirect
github.com/pion/ice/v2 v2.3.24 // indirect
github.com/pion/interceptor v0.1.29 // indirect
github.com/pion/logging v0.2.2 // indirect
github.com/pion/mdns v0.0.12 // indirect
github.com/pion/randutil v0.1.0 // indirect
github.com/pion/rtcp v1.2.14 // indirect
github.com/pion/rtp v1.8.6 // indirect
github.com/pion/sctp v1.8.16 // indirect
github.com/pion/sdp/v3 v3.0.9 // indirect
github.com/pion/srtp/v2 v2.0.18 // indirect
github.com/pion/stun v0.6.1 // indirect
github.com/pion/transport/v2 v2.2.5 // indirect
github.com/pion/turn/v2 v2.1.6 // indirect
github.com/pion/webrtc/v3 v3.2.40 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/polydawn/refmt v0.89.0 // indirect
Expand Down
2 changes: 0 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,6 @@ github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiU
github.com/go-ole/go-ole v1.3.0 h1:Dt6ye7+vXGIKZ7Xtk4s6/xVdGDQynvom7xCFEdWr6uE=
github.com/go-ole/go-ole v1.3.0/go.mod h1:5LS6F96DhAwUc7C+1HLexzMXY1xGRSryjyPPKW6zv78=
github.com/go-stack/stack v1.6.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
github.com/go-yaml/yaml v2.1.0+incompatible/go.mod h1:w2MrLa16VYP0jy6N7M5kHaCkaLENm+P+Tv+MfurjSw0=
Expand Down Expand Up @@ -668,7 +667,6 @@ github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
Expand Down
8 changes: 7 additions & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,12 @@ Generate an identity seed and launch a gateway:
EnvVars: []string{"RAINBOW_TRACING_AUTH"},
Usage: "If set the key gates use of the Traceparent header by requiring the key to be passed in the Authorization header",
},
&cli.Float64Flag{
Name: "sampling-fraction",
Value: 0,
EnvVars: []string{"RAINBOW_SAMPLING_FRACTION"},
Usage: "Rate at which to sample gateway requests. Does not include traceheaders which will always sample",
},
}

app.Commands = []*cli.Command{
Expand Down Expand Up @@ -487,7 +493,7 @@ share the same seed as long as the indexes are different.
registerVersionMetric(version)
registerIpfsNodeCollector(gnd)

tp, shutdown, err := newTracerProvider(cctx.Context)
tp, shutdown, err := newTracerProvider(cctx.Context, cctx.Float64("sampling-fraction"))

Check warning on line 496 in main.go

View check run for this annotation

Codecov / codecov/patch

main.go#L496

Added line #L496 was not covered by tests
if err != nil {
return err
}
Expand Down
64 changes: 43 additions & 21 deletions tracing.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package main

import (
"context"
"fmt"
"github.com/ipfs/boxo/tracing"
"go.opentelemetry.io/otel/sdk/resource"
"go.opentelemetry.io/otel/sdk/trace"
Expand All @@ -12,7 +11,7 @@ import (
"strings"
)

func newTracerProvider(ctx context.Context) (traceapi.TracerProvider, func(context.Context) error, error) {
func newTracerProvider(ctx context.Context, traceFraction float64) (traceapi.TracerProvider, func(context.Context) error, error) {

Check warning on line 14 in tracing.go

View check run for this annotation

Codecov / codecov/patch

tracing.go#L14

Added line #L14 was not covered by tests
exporters, err := tracing.NewSpanExporters(ctx)
if err != nil {
return nil, nil, err
Expand All @@ -39,33 +38,56 @@ func newTracerProvider(ctx context.Context) (traceapi.TracerProvider, func(conte
return nil, nil, err
}

options = append(options, trace.WithResource(r), trace.WithSampler(RootPrefixSampler{RootPrefix: "Gateway", Next: trace.ParentBased(trace.NeverSample())}))
var baseSampler trace.Sampler
if traceFraction == 0 {
baseSampler = trace.NeverSample()
} else {
baseSampler = trace.TraceIDRatioBased(traceFraction)

Check warning on line 45 in tracing.go

View check run for this annotation

Codecov / codecov/patch

tracing.go#L41-L45

Added lines #L41 - L45 were not covered by tests
}

// Sample all children whose parents are sampled
// Probabilistically sample if the span is a root which is a Gateway request
sampler := trace.ParentBased(
CascadingSamplerFunc(func(parameters trace.SamplingParameters) bool {
return !traceapi.SpanContextFromContext(parameters.ParentContext).IsValid()
}, "root sampler",
CascadingSamplerFunc(func(parameters trace.SamplingParameters) bool {
return strings.HasPrefix(parameters.Name, "Gateway")
}, "gateway request sampler",

Check warning on line 56 in tracing.go

View check run for this annotation

Codecov / codecov/patch

tracing.go#L50-L56

Added lines #L50 - L56 were not covered by tests
baseSampler)))
options = append(options, trace.WithResource(r), trace.WithSampler(sampler))

Check warning on line 58 in tracing.go

View check run for this annotation

Codecov / codecov/patch

tracing.go#L58

Added line #L58 was not covered by tests

tp := trace.NewTracerProvider(options...)
return tp, tp.Shutdown, nil
}

type RootPrefixSampler struct {
Next trace.Sampler
RootPrefix string
type funcSampler struct {
next trace.Sampler
fn func(trace.SamplingParameters) trace.SamplingResult
description string
}

var _ trace.Sampler = (*RootPrefixSampler)(nil)

func (s RootPrefixSampler) ShouldSample(parameters trace.SamplingParameters) trace.SamplingResult {
parentSpan := traceapi.SpanContextFromContext(parameters.ParentContext)
if !parentSpan.IsValid() && strings.HasPrefix(parameters.Name, s.RootPrefix) {
res := s.Next.ShouldSample(parameters)
return trace.SamplingResult{
Decision: res.Decision,
Attributes: res.Attributes,
Tracestate: res.Tracestate,
}
}
func (f funcSampler) ShouldSample(parameters trace.SamplingParameters) trace.SamplingResult {
return f.fn(parameters)

Check warning on line 71 in tracing.go

View check run for this annotation

Codecov / codecov/patch

tracing.go#L70-L71

Added lines #L70 - L71 were not covered by tests
}

return s.Next.ShouldSample(parameters)
func (f funcSampler) Description() string {
return f.description

Check warning on line 75 in tracing.go

View check run for this annotation

Codecov / codecov/patch

tracing.go#L74-L75

Added lines #L74 - L75 were not covered by tests
}

func (s RootPrefixSampler) Description() string {
return fmt.Sprintf("root prefix sampler: %s", s.RootPrefix)
// CascadingSamplerFunc will sample with the next tracer if the condition is met, otherwise the sample will be dropped
func CascadingSamplerFunc(shouldSample func(parameters trace.SamplingParameters) bool, description string, next trace.Sampler) trace.Sampler {
return funcSampler{
next: next,
fn: func(parameters trace.SamplingParameters) trace.SamplingResult {
if shouldSample(parameters) {
return next.ShouldSample(parameters)

Check warning on line 84 in tracing.go

View check run for this annotation

Codecov / codecov/patch

tracing.go#L79-L84

Added lines #L79 - L84 were not covered by tests
}
return trace.SamplingResult{
Decision: trace.Drop,
Tracestate: traceapi.SpanContextFromContext(parameters.ParentContext).TraceState(),

Check warning on line 88 in tracing.go

View check run for this annotation

Codecov / codecov/patch

tracing.go#L86-L88

Added lines #L86 - L88 were not covered by tests
}
},
description: description,
}
}

0 comments on commit 395c2e1

Please sign in to comment.