Skip to content

Commit

Permalink
feat: configurable probabilistic tracing of gateway requests
Browse files Browse the repository at this point in the history
  • Loading branch information
aschmahmann committed Jun 7, 2024
1 parent f24ad1f commit 9495073
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 30 deletions.
16 changes: 11 additions & 5 deletions docs/environment-variables.md
Original file line number Diff line number Diff line change
Expand Up @@ -299,18 +299,24 @@ specifications. Configuration environment variables are specified in the
[OpenTelemetry Environment Variable Specification] where possible. The
[Boxo Tracing] documentation is the basis for tracing here.

Two major distinctions from the more general tracing enabled in boxo are:
1. When tracing is enabled it is restricted to flows through HTTP Gateway requests,
rather than also included background processes
2. Requests are only traced when there is a `Traceparent` header passed that is valid
According to the [Trace Context] specification
A major distinctions from the more general tracing enabled in boxo is that when
tracing is enabled it is restricted to flows through HTTP Gateway requests, rather
than also included background processes.

Note: requests are also traced when there is a `Traceparent` header passed that is valid
According to the [Trace Context] specification, even if the sampling fraction is set to 0.

### `RAINBOW_TRACING_AUTH`

The ability to pass `Traceparent` or `Tracestate` headers is guarded by an
`Authorization` header. The value of the `Authorization` header should match
the value in the `RAINBOW_TRACING_AUTH` environment variable.

### `RAINBOW_SAMPLING_FRACTION`

The fraction (between 0 and 1) of requests that should be sampled.
This is calculated independently of any Traceparent based sampling.

[Boxo Tracing]: https://github.com/ipfs/boxo/blob/main/docs/tracing.md
[Open Telemetry]: https://opentelemetry.io/
[OpenTelemetry Environment Variable Specification]: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/sdk-environment-variables.md
Expand Down
18 changes: 17 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ require (
github.com/go-logr/logr v1.4.1 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-ole/go-ole v1.3.0 // indirect
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
github.com/godbus/dbus/v5 v5.1.0 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/gddo v0.0.0-20210115222349-20d68f94ee1f // indirect
Expand Down Expand Up @@ -146,6 +146,22 @@ require (
github.com/opentracing/opentracing-go v1.2.0 // indirect
github.com/openzipkin/zipkin-go v0.4.3 // indirect
github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9 // indirect
github.com/pion/datachannel v1.5.6 // indirect
github.com/pion/dtls/v2 v2.2.11 // indirect
github.com/pion/ice/v2 v2.3.24 // indirect
github.com/pion/interceptor v0.1.29 // indirect
github.com/pion/logging v0.2.2 // indirect
github.com/pion/mdns v0.0.12 // indirect
github.com/pion/randutil v0.1.0 // indirect
github.com/pion/rtcp v1.2.14 // indirect
github.com/pion/rtp v1.8.6 // indirect
github.com/pion/sctp v1.8.16 // indirect
github.com/pion/sdp/v3 v3.0.9 // indirect
github.com/pion/srtp/v2 v2.0.18 // indirect
github.com/pion/stun v0.6.1 // indirect
github.com/pion/transport/v2 v2.2.5 // indirect
github.com/pion/turn/v2 v2.1.6 // indirect
github.com/pion/webrtc/v3 v3.2.40 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/polydawn/refmt v0.89.0 // indirect
Expand Down
2 changes: 0 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,6 @@ github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiU
github.com/go-ole/go-ole v1.3.0 h1:Dt6ye7+vXGIKZ7Xtk4s6/xVdGDQynvom7xCFEdWr6uE=
github.com/go-ole/go-ole v1.3.0/go.mod h1:5LS6F96DhAwUc7C+1HLexzMXY1xGRSryjyPPKW6zv78=
github.com/go-stack/stack v1.6.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
github.com/go-yaml/yaml v2.1.0+incompatible/go.mod h1:w2MrLa16VYP0jy6N7M5kHaCkaLENm+P+Tv+MfurjSw0=
Expand Down Expand Up @@ -668,7 +667,6 @@ github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
Expand Down
8 changes: 7 additions & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,12 @@ Generate an identity seed and launch a gateway:
EnvVars: []string{"RAINBOW_TRACING_AUTH"},
Usage: "If set the key gates use of the Traceparent header by requiring the key to be passed in the Authorization header",
},
&cli.Float64Flag{
Name: "sampling-fraction",
Value: 0,
EnvVars: []string{"RAINBOW_SAMPLING_FRACTION"},
Usage: "Rate at which to sample gateway requests. Does not include traceheaders which will always sample",
},
}

app.Commands = []*cli.Command{
Expand Down Expand Up @@ -487,7 +493,7 @@ share the same seed as long as the indexes are different.
registerVersionMetric(version)
registerIpfsNodeCollector(gnd)

tp, shutdown, err := newTracerProvider(cctx.Context)
tp, shutdown, err := newTracerProvider(cctx.Context, cctx.Float64("sampling-fraction"))
if err != nil {
return err
}
Expand Down
65 changes: 44 additions & 21 deletions tracing.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package main

import (
"context"
"fmt"
"github.com/ipfs/boxo/tracing"
"go.opentelemetry.io/otel/sdk/resource"
"go.opentelemetry.io/otel/sdk/trace"
Expand All @@ -12,7 +11,7 @@ import (
"strings"
)

func newTracerProvider(ctx context.Context) (traceapi.TracerProvider, func(context.Context) error, error) {
func newTracerProvider(ctx context.Context, traceFraction float64) (traceapi.TracerProvider, func(context.Context) error, error) {
exporters, err := tracing.NewSpanExporters(ctx)
if err != nil {
return nil, nil, err
Expand All @@ -39,33 +38,57 @@ func newTracerProvider(ctx context.Context) (traceapi.TracerProvider, func(conte
return nil, nil, err
}

options = append(options, trace.WithResource(r), trace.WithSampler(RootPrefixSampler{RootPrefix: "Gateway", Next: trace.ParentBased(trace.NeverSample())}))
var baseSampler trace.Sampler
if traceFraction == 0 {
baseSampler = trace.NeverSample()
} else {
baseSampler = trace.TraceIDRatioBased(traceFraction)
}

// Sample all children whose parents are sampled
// Probabilistically sample if the span is a root which is a Gateway request
var sampler trace.Sampler

Check failure on line 50 in tracing.go

View workflow job for this annotation

GitHub Actions / go-check / All

should merge variable declaration with assignment on next line (S1021)
sampler = trace.ParentBased(
CascadingSamplerFunc(func(parameters trace.SamplingParameters) bool {
return !traceapi.SpanContextFromContext(parameters.ParentContext).IsValid()
}, "root sampler",
CascadingSamplerFunc(func(parameters trace.SamplingParameters) bool {
return strings.HasPrefix(parameters.Name, "Gateway")
}, "gateway request sampler",
baseSampler)))
options = append(options, trace.WithResource(r), trace.WithSampler(sampler))

tp := trace.NewTracerProvider(options...)
return tp, tp.Shutdown, nil
}

type RootPrefixSampler struct {
Next trace.Sampler
RootPrefix string
type funcSampler struct {
next trace.Sampler
fn func(trace.SamplingParameters) trace.SamplingResult
description string
}

var _ trace.Sampler = (*RootPrefixSampler)(nil)

func (s RootPrefixSampler) ShouldSample(parameters trace.SamplingParameters) trace.SamplingResult {
parentSpan := traceapi.SpanContextFromContext(parameters.ParentContext)
if !parentSpan.IsValid() && strings.HasPrefix(parameters.Name, s.RootPrefix) {
res := s.Next.ShouldSample(parameters)
return trace.SamplingResult{
Decision: res.Decision,
Attributes: res.Attributes,
Tracestate: res.Tracestate,
}
}
func (f funcSampler) ShouldSample(parameters trace.SamplingParameters) trace.SamplingResult {
return f.fn(parameters)
}

return s.Next.ShouldSample(parameters)
func (f funcSampler) Description() string {
return f.description
}

func (s RootPrefixSampler) Description() string {
return fmt.Sprintf("root prefix sampler: %s", s.RootPrefix)
// CascadingSamplerFunc will sample with the next tracer if the condition is met, otherwise the sample will be dropped
func CascadingSamplerFunc(shouldSample func(parameters trace.SamplingParameters) bool, description string, next trace.Sampler) trace.Sampler {
return funcSampler{
next: next,
fn: func(parameters trace.SamplingParameters) trace.SamplingResult {
if shouldSample(parameters) {
return next.ShouldSample(parameters)
}
return trace.SamplingResult{
Decision: trace.Drop,
Tracestate: traceapi.SpanContextFromContext(parameters.ParentContext).TraceState(),
}
},
description: description,
}
}

0 comments on commit 9495073

Please sign in to comment.