From c4859ec66c953c0e1bf6ad4996c63b4ba16645cf Mon Sep 17 00:00:00 2001 From: Daniel Norman <1992255+2color@users.noreply.github.com> Date: Tue, 19 Nov 2024 10:41:18 +0100 Subject: [PATCH] feat: tracing, improved metrics, go-libp2p-kad-dht and boxo upgrade, request timeouts (#87) * fix: larger duration buckets for better visibility * feat: log accept header * fix: move instrumentation to boxo * feat: add tracing with auth token * feat: add 30 second request timeout * chore: remove replace directive * chore: add missing funcSampler * chore: remove request timeout this isn't working too well. We need to look more deeply into this * chore: update changelog * chore: go mod tidy * chore: go-libp2p-kad-dht v0.28.1 * chore: latest boxo#720 * chore: mod tidy * chore: boxo main with ipfs/boxo#720 and ipfs/boxo#718 * Apply suggestions from code review Co-authored-by: Marcin Rataj * fix: typo --------- Co-authored-by: Daniel N <2color@users.noreply.github.com> Co-authored-by: Marcin Rataj --- CHANGELOG.md | 5 ++ docs/environment-variables.md | 22 +++++++ docs/tracing.md | 39 ++++++++++++ go.mod | 35 ++++++++--- go.sum | 73 +++++++++++++++++----- main.go | 15 +++++ server.go | 52 ++++++++++++---- tracing.go | 114 ++++++++++++++++++++++++++++++++++ 8 files changed, 318 insertions(+), 37 deletions(-) create mode 100644 docs/tracing.md create mode 100644 tracing.go diff --git a/CHANGELOG.md b/CHANGELOG.md index f2201cd..c7caaeb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,8 +15,13 @@ The following emojis are used to highlight certain changes: ### Added +- Add request tracing with sampling or require token for requests with Traceparent header. See [tracing.md](./docs/tracing.md) for more details. + ### Changed +- go-libp2p-kad-dht updated to [v0.28.1](https://github.com/libp2p/go-libp2p-kad-dht/releases/tag/v0.28.1) +- Metrics `someguy_http_request_duration_seconds` and `someguy_http_response_size_bytes` were replaced with `delegated_routing_server_http_request_duration_seconds` and `delegated_routing_server_http_response_size_bytes` from upstream `boxo/routing/http/server`. + ### Removed ### Fixed diff --git a/docs/environment-variables.md b/docs/environment-variables.md index 3c59c66..2c10c44 100644 --- a/docs/environment-variables.md +++ b/docs/environment-variables.md @@ -19,6 +19,9 @@ - [`GOLOG_LOG_FMT`](#golog_log_fmt) - [`GOLOG_FILE`](#golog_file) - [`GOLOG_TRACING_FILE`](#golog_tracing_file) +- [Tracing](#tracing) + - [`SOMEGUY_SAMPLING_FRACTION`](#someguy_sampling_fraction) + - [`SOMEGUY_TRACING_AUTH`](#someguy_tracing_auth) ## Configuration @@ -140,3 +143,22 @@ Sets the file to which the logs are saved. By default, they are printed to the s Sets the file to which the tracing events are sent. By default, tracing is disabled. Warning: Enabling tracing will likely affect performance. + +## Tracing + +See [tracing.md](tracing.md). + +### `SOMEGUY_TRACING_AUTH` + +Optional, setting to non-empty value enables on-demand tracing per-request. + +The ability to pass `Traceparent` or `Tracestate` headers is guarded by an +`Authorization` header. The value of the `Authorization` header should match +the value in the `SOMEGUY_TRACING_AUTH` environment variable. + +### `SOMEGUY_SAMPLING_FRACTION` + +Optional, set to 0 by default. + +The fraction (between 0 and 1) of requests that should be sampled. +This is calculated independently of any Traceparent based sampling. diff --git a/docs/tracing.md b/docs/tracing.md new file mode 100644 index 0000000..a377735 --- /dev/null +++ b/docs/tracing.md @@ -0,0 +1,39 @@ +## Tracing + +Tracing across the stack follows, as much as possible, the [Open Telemetry] +specifications. Configuration environment variables are specified in the +[OpenTelemetry Environment Variable Specification] where possible. The +[Boxo Tracing] documentation is the basis for tracing here. + +> [!NOTE] +> A major distinction from the more [general tracing enabled in boxo][Boxo Tracing] is that when +> tracing is enabled it is restricted to flows through HTTP Gateway requests, rather +> than also included background processes. + +### Fractional Sampling + +To sample a % of requests set [`SOMEGUY_SAMPLING_FRACTION`](environment-variables.md#someguy_sampling_fraction) to a value between `0` and `1`. + +### Per Request + +Per-request tracing is possible when a non-empty [`SOMEGUY_TRACING_AUTH`](environment-variables.md#someguy_tracing_auth) is set in Someguy and when there are both valid +[Authorization](headers.md#authorization) and [`Traceparent`](headers.md#traceparent) HTTP headers passed in the request. + +### Per-request tracing example: + +```console +$ export SOMEGUY_TRACING_AUTH=CHANGEME-tracing-auth-secret # use value from Someguy config +$ export CID=bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi +$ curl -H "Authorization: $SOMEGUY_TRACING_AUTH" -H "Traceparent: 00-$(openssl rand -hex 16)-00$(openssl rand -hex 7)-01" http://127.0.0.1:8090/routing/v1/providers/$CID -v -o /dev/null +... +> Authorization: CHANGEME-tracing-auth-secret +> Traceparent: 00-b617dc6b6e302ccbabe0115eac80320b-00033792c7de8fc6-01 +... +```` + +Now you can search for `trace_id = b617dc6b6e302ccbabe0115eac80320b` to find the trace. + +[Boxo Tracing]: https://github.com/ipfs/boxo/blob/main/docs/tracing.md +[Open Telemetry]: https://opentelemetry.io/ +[OpenTelemetry Environment Variable Specification]: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/configuration/sdk-environment-variables.md +[Trace Context]: https://www.w3.org/TR/trace-context diff --git a/go.mod b/go.mod index 964b70a..71c327e 100644 --- a/go.mod +++ b/go.mod @@ -7,11 +7,11 @@ require ( github.com/coreos/go-systemd/v22 v22.5.0 github.com/dustin/go-humanize v1.0.1 github.com/felixge/httpsnoop v1.0.4 - github.com/ipfs/boxo v0.24.1 + github.com/ipfs/boxo v0.24.4-0.20241119003055-e38f236348d6 github.com/ipfs/go-cid v0.4.1 github.com/ipfs/go-log/v2 v2.5.1 github.com/libp2p/go-libp2p v0.37.0 - github.com/libp2p/go-libp2p-kad-dht v0.27.0 + github.com/libp2p/go-libp2p-kad-dht v0.28.1 github.com/libp2p/go-libp2p-record v0.2.0 github.com/multiformats/go-multiaddr v0.13.0 github.com/multiformats/go-multibase v0.2.0 @@ -19,10 +19,14 @@ require ( github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 github.com/prometheus/client_golang v1.20.5 github.com/rs/cors v1.11.0 - github.com/slok/go-http-metrics v0.12.0 github.com/stretchr/testify v1.9.0 github.com/urfave/cli/v2 v2.27.3 - golang.org/x/sys v0.26.0 + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0 + go.opentelemetry.io/contrib/propagators/autoprop v0.57.0 + go.opentelemetry.io/otel v1.32.0 + go.opentelemetry.io/otel/sdk v1.32.0 + go.opentelemetry.io/otel/trace v1.32.0 + golang.org/x/sys v0.27.0 ) require ( @@ -30,6 +34,7 @@ require ( github.com/andybalholm/brotli v1.1.0 // indirect github.com/benbjohnson/clock v1.3.5 // indirect github.com/beorn7/perks v1.0.1 // indirect + github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/containerd/cgroups v1.1.0 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect @@ -50,6 +55,7 @@ require ( github.com/google/uuid v1.6.0 // indirect github.com/gorilla/mux v1.8.1 // indirect github.com/gorilla/websocket v1.5.3 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 // indirect github.com/hashicorp/errwrap v1.1.0 // indirect github.com/hashicorp/go-multierror v1.1.1 // indirect github.com/hashicorp/golang-lru v1.0.2 // indirect @@ -93,6 +99,7 @@ require ( github.com/onsi/ginkgo/v2 v2.20.2 // indirect github.com/opencontainers/runtime-spec v1.2.0 // indirect github.com/opentracing/opentracing-go v1.2.0 // indirect + github.com/openzipkin/zipkin-go v0.4.3 // indirect github.com/pion/datachannel v1.5.9 // indirect github.com/pion/dtls/v2 v2.2.12 // indirect github.com/pion/ice/v2 v2.3.36 // indirect @@ -120,16 +127,25 @@ require ( github.com/quic-go/webtransport-go v0.8.1-0.20241018022711-4ac2c9250e66 // indirect github.com/raulk/go-watchdog v1.3.0 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect - github.com/samber/lo v1.46.0 // indirect + github.com/samber/lo v1.47.0 // indirect + github.com/slok/go-http-metrics v0.12.0 // indirect github.com/spaolacci/murmur3 v1.1.0 // indirect github.com/stretchr/objx v0.5.2 // indirect github.com/whyrusleeping/go-keyspace v0.0.0-20160322163242-5b898ac5add1 // indirect github.com/wlynxg/anet v0.0.5 // indirect github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect go.opencensus.io v0.24.0 // indirect - go.opentelemetry.io/otel v1.28.0 // indirect - go.opentelemetry.io/otel/metric v1.28.0 // indirect - go.opentelemetry.io/otel/trace v1.28.0 // indirect + go.opentelemetry.io/contrib/propagators/aws v1.32.0 // indirect + go.opentelemetry.io/contrib/propagators/b3 v1.32.0 // indirect + go.opentelemetry.io/contrib/propagators/jaeger v1.32.0 // indirect + go.opentelemetry.io/contrib/propagators/ot v1.32.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.31.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.31.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.31.0 // indirect + go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.31.0 // indirect + go.opentelemetry.io/otel/exporters/zipkin v1.31.0 // indirect + go.opentelemetry.io/otel/metric v1.32.0 // indirect + go.opentelemetry.io/proto/otlp v1.3.1 // indirect go.uber.org/dig v1.18.0 // indirect go.uber.org/fx v1.23.0 // indirect go.uber.org/mock v0.5.0 // indirect @@ -143,6 +159,9 @@ require ( golang.org/x/text v0.19.0 // indirect golang.org/x/tools v0.26.0 // indirect gonum.org/v1/gonum v0.15.1 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 // indirect + google.golang.org/grpc v1.67.1 // indirect google.golang.org/protobuf v1.35.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect lukechampine.com/blake3 v1.3.0 // indirect diff --git a/go.sum b/go.sum index e94cbfd..0d4312f 100644 --- a/go.sum +++ b/go.sum @@ -37,6 +37,8 @@ github.com/btcsuite/snappy-go v0.0.0-20151229074030-0bdef8d06723/go.mod h1:8woku github.com/btcsuite/websocket v0.0.0-20150119174127-31079b680792/go.mod h1:ghJtEyQwv5/p4Mg4C0fgbePVuGr935/5ddU9Z3TmDRY= github.com/btcsuite/winsvc v1.0.0/go.mod h1:jsenWakMcC0zFBFurPLEAyrnc/teJEM1O46fmI40EZs= github.com/buger/jsonparser v0.0.0-20181115193947-bf1c66bbce23/go.mod h1:bbYlZJ7hK1yFx9hf58LP0zeX7UjIGs20ufpu3evjr+s= +github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= +github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= @@ -172,6 +174,8 @@ github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aN github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= github.com/grpc-ecosystem/grpc-gateway v1.5.0/go.mod h1:RSKVYQBd5MCa4OVpNdGskqpgL2+G+NZTnrVHpWWfpdw= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 h1:asbCHRVmodnJTuQ3qamDwqVOIjwqUPTYmYuemVOx+Ys= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0/go.mod h1:ggCgvZ2r7uOoQjOyu2Y1NhHmEPPzzuhWgcza5M1Ji1I= github.com/gxed/hashland/keccakpg v0.0.1/go.mod h1:kRzw3HkwxFU1mpmPP8v1WyQzwdGfmKFJ6tItnhQ67kU= github.com/gxed/hashland/murmur3 v0.0.1/go.mod h1:KjXop02n4/ckmZSnY2+HKcLud/tcmvhST0bie/0lS48= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= @@ -187,8 +191,8 @@ github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpO github.com/huin/goupnp v1.3.0 h1:UvLUlWDNpoUdYzb2TCn+MuTWtcjXKSza2n6CBdQ0xXc= github.com/huin/goupnp v1.3.0/go.mod h1:gnGPsThkYa7bFi/KWmEysQRf48l2dvR5bxr2OFckNX8= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= -github.com/ipfs/boxo v0.24.1 h1:Y1n+8Q9lqeWLhEMZR2staJDnY80mtSWNR+hOhG3VtDo= -github.com/ipfs/boxo v0.24.1/go.mod h1:8mVcuQl2qqprmSOXHeqvvP4TezOobM92fKF3+ugpE58= +github.com/ipfs/boxo v0.24.4-0.20241119003055-e38f236348d6 h1:/gzUlhK7vaYZEMRgy8fWDmaQ3ZHbZa4BgGjutv4JIDY= +github.com/ipfs/boxo v0.24.4-0.20241119003055-e38f236348d6/go.mod h1:Kxk43F+avGAsJSwhJW4isNYrpGwXHRJCvJ19Pt+MQc4= github.com/ipfs/go-block-format v0.2.0 h1:ZqrkxBA2ICbDRbK8KJs/u0O3dlp6gmAuuXUJNiW1Ycs= github.com/ipfs/go-block-format v0.2.0/go.mod h1:+jpL11nFx5A/SPpsoBn6Bzkra/zaArfSmsknbPMYgzM= github.com/ipfs/go-cid v0.0.3/go.mod h1:GHWU/WuQdMPmIosc4Yn1bcCT7dSeX4lBafM7iqUPQvM= @@ -275,8 +279,8 @@ github.com/libp2p/go-libp2p-asn-util v0.4.1 h1:xqL7++IKD9TBFMgnLPZR6/6iYhawHKHl9 github.com/libp2p/go-libp2p-asn-util v0.4.1/go.mod h1:d/NI6XZ9qxw67b4e+NgpQexCIiFYJjErASrYW4PFDN8= github.com/libp2p/go-libp2p-core v0.2.4/go.mod h1:STh4fdfa5vDYr0/SzYYeqnt+E6KfEV5VxfIrm0bcI0g= github.com/libp2p/go-libp2p-core v0.3.0/go.mod h1:ACp3DmS3/N64c2jDzcV429ukDpicbL6+TrrxANBjPGw= -github.com/libp2p/go-libp2p-kad-dht v0.27.0 h1:1Ea32tVTPiAfaLpPMbaBWFJgbsi/JpMqC2YBuFdf32o= -github.com/libp2p/go-libp2p-kad-dht v0.27.0/go.mod h1:ixhjLuzaXSGtWsKsXTj7erySNuVC4UP7NO015cRrF14= +github.com/libp2p/go-libp2p-kad-dht v0.28.1 h1:DVTfzG8Ybn88g9RycIq47evWCRss5f0Wm8iWtpwyHso= +github.com/libp2p/go-libp2p-kad-dht v0.28.1/go.mod h1:0wHURlSFdAC42+wF7GEmpLoARw8JuS8do2guCtc/Y/w= github.com/libp2p/go-libp2p-kbucket v0.3.1/go.mod h1:oyjT5O7tS9CQurok++ERgc46YLwEpuGoFq9ubvoUOio= github.com/libp2p/go-libp2p-kbucket v0.6.4 h1:OjfiYxU42TKQSB8t8WYd8MKhYhMJeO2If+NiuKfb6iQ= github.com/libp2p/go-libp2p-kbucket v0.6.4/go.mod h1:jp6w82sczYaBsAypt5ayACcRJi0lgsba7o4TzJKEfWA= @@ -391,11 +395,14 @@ github.com/opentracing/opentracing-go v1.0.2/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFSt github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs= github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc= github.com/openzipkin/zipkin-go v0.1.1/go.mod h1:NtoC/o8u3JlF1lSlyPNswIbeQH9bJTmOf0Erfk+hxe8= +github.com/openzipkin/zipkin-go v0.4.3 h1:9EGwpqkgnwdEIJ+Od7QVSEIH+ocmm5nPat0G7sjsSdg= +github.com/openzipkin/zipkin-go v0.4.3/go.mod h1:M9wCJZFWCo2RiY+o1eBCEMe0Dp2S5LDHcMZmk3RmK7c= github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 h1:onHthvaw9LFnH4t2DcNVpwGmV9E1BkGknEliJkfwQj0= github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhMYhSNPKjeNKa5WY9YCIEBRbNzFFPJbWO6Y= github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= -github.com/pierrec/lz4/v4 v4.1.18 h1:xaKrnTkyoqfh1YItXl56+6KJNVYWlEEPuAQW9xsplYQ= github.com/pierrec/lz4/v4 v4.1.18/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= +github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= +github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pion/datachannel v1.5.9 h1:LpIWAOYPyDrXtU+BW7X0Yt/vGtYxtXQ8ql7dFfYUVZA= github.com/pion/datachannel v1.5.9/go.mod h1:kDUuk4CU4Uxp82NH4LQZbISULkX/HtzKa4P7ldf9izE= github.com/pion/dtls/v2 v2.2.7/go.mod h1:8WiMkebSHFD0T+dIU+UeBaoV7kDhOW5oDCzZ7WZ/F9s= @@ -467,16 +474,16 @@ github.com/quic-go/webtransport-go v0.8.1-0.20241018022711-4ac2c9250e66/go.mod h github.com/raulk/go-watchdog v1.3.0 h1:oUmdlHxdkXRJlwfG0O9omj8ukerm8MEQavSiDTEtBsk= github.com/raulk/go-watchdog v1.3.0/go.mod h1:fIvOnLbF0b0ZwkB9YU4mOW9Did//4vPZtDqv66NfsMU= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= -github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= -github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= +github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= +github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= github.com/rs/cors v1.11.0 h1:0B9GE/r9Bc2UxRMMtymBkHTenPkHDv0CW4Y98GBY+po= github.com/rs/cors v1.11.0/go.mod h1:XyqrcTp5zjWr1wsJ8PIRZssZ8b/WMcMf71DJnit4EMU= github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/samber/lo v1.46.0 h1:w8G+oaCPgz1PoCJztqymCFaKwXt+5cCXn51uPxExFfQ= -github.com/samber/lo v1.46.0/go.mod h1:RmDH9Ct32Qy3gduHQuKJ3gW1fMHAnE/fAzQuf6He5cU= +github.com/samber/lo v1.47.0 h1:z7RynLwP5nbyRscyvcD043DWYoOcYRv3mV8lBeqOCLc= +github.com/samber/lo v1.47.0/go.mod h1:RmDH9Ct32Qy3gduHQuKJ3gW1fMHAnE/fAzQuf6He5cU= github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= github.com/shurcooL/component v0.0.0-20170202220835-f88ec8f54cc4/go.mod h1:XhFIlyj5a1fBNx5aJTbKoIq0mNaPvOagO+HjB3EtxrY= github.com/shurcooL/events v0.0.0-20181021180414-410e4ca65f48/go.mod h1:5u70Mqkb5O5cxEA8nxTsgrgLehJeAw6Oc4Ab1c/P1HM= @@ -573,12 +580,38 @@ go.opencensus.io v0.22.1/go.mod h1:Ap50jQcDJrx6rB6VgeeFPtuPIf3wMRvRfrfYDO6+BmA= go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= -go.opentelemetry.io/otel v1.28.0 h1:/SqNcYk+idO0CxKEUOtKQClMK/MimZihKYMruSMViUo= -go.opentelemetry.io/otel v1.28.0/go.mod h1:q68ijF8Fc8CnMHKyzqL6akLO46ePnjkgfIMIjUIX9z4= -go.opentelemetry.io/otel/metric v1.28.0 h1:f0HGvSl1KRAU1DLgLGFjrwVyismPlnuU6JD6bOeuA5Q= -go.opentelemetry.io/otel/metric v1.28.0/go.mod h1:Fb1eVBFZmLVTMb6PPohq3TO9IIhUisDsbJoL/+uQW4s= -go.opentelemetry.io/otel/trace v1.28.0 h1:GhQ9cUuQGmNDd5BTCP2dAvv75RdMxEfTmYejp+lkx9g= -go.opentelemetry.io/otel/trace v1.28.0/go.mod h1:jPyXzNPg6da9+38HEwElrQiHlVMTnVfM3/yv2OlIHaI= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0 h1:UP6IpuHFkUgOQL9FFQFrZ+5LiwhhYRbi7VZSIx6Nj5s= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0/go.mod h1:qxuZLtbq5QDtdeSHsS7bcf6EH6uO6jUAgk764zd3rhM= +go.opentelemetry.io/contrib/propagators/autoprop v0.57.0 h1:bNPJOdT5154XxzeFmrh8R+PXnV4t3TZEczy8gHEpcpg= +go.opentelemetry.io/contrib/propagators/autoprop v0.57.0/go.mod h1:Tb0j0mK+QatKdCxCKPN7CSzc7kx/q34/KaohJx/N96s= +go.opentelemetry.io/contrib/propagators/aws v1.32.0 h1:NELzr8bW7a7aHVZj5gaep1PfkvoSCGx+1qNGZx/uhhU= +go.opentelemetry.io/contrib/propagators/aws v1.32.0/go.mod h1:XKMrzHNka3eOA+nGEcNKYVL9s77TAhkwQEynYuaRFnQ= +go.opentelemetry.io/contrib/propagators/b3 v1.32.0 h1:MazJBz2Zf6HTN/nK/s3Ru1qme+VhWU5hm83QxEP+dvw= +go.opentelemetry.io/contrib/propagators/b3 v1.32.0/go.mod h1:B0s70QHYPrJwPOwD1o3V/R8vETNOG9N3qZf4LDYvA30= +go.opentelemetry.io/contrib/propagators/jaeger v1.32.0 h1:K/fOyTMD6GELKTIJBaJ9k3ppF2Njt8MeUGBOwfaWXXA= +go.opentelemetry.io/contrib/propagators/jaeger v1.32.0/go.mod h1:ISE6hda//MTWvtngG7p4et3OCngsrTVfl7c6DjN17f8= +go.opentelemetry.io/contrib/propagators/ot v1.32.0 h1:Poy02A4wOZubHyd2hpHPDgZW+rn6EIq0vCwTZJ6Lmu8= +go.opentelemetry.io/contrib/propagators/ot v1.32.0/go.mod h1:cbhaURV+VR3NIMarzDYZU1RDEkXG1fNd1WMP1XCcGkY= +go.opentelemetry.io/otel v1.32.0 h1:WnBN+Xjcteh0zdk01SVqV55d/m62NJLJdIyb4y/WO5U= +go.opentelemetry.io/otel v1.32.0/go.mod h1:00DCVSB0RQcnzlwyTfqtxSm+DRr9hpYrHjNGiBHVQIg= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.31.0 h1:K0XaT3DwHAcV4nKLzcQvwAgSyisUghWoY20I7huthMk= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.31.0/go.mod h1:B5Ki776z/MBnVha1Nzwp5arlzBbE3+1jk+pGmaP5HME= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.31.0 h1:FFeLy03iVTXP6ffeN2iXrxfGsZGCjVx0/4KlizjyBwU= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.31.0/go.mod h1:TMu73/k1CP8nBUpDLc71Wj/Kf7ZS9FK5b53VapRsP9o= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.31.0 h1:lUsI2TYsQw2r1IASwoROaCnjdj2cvC2+Jbxvk6nHnWU= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.31.0/go.mod h1:2HpZxxQurfGxJlJDblybejHB6RX6pmExPNe517hREw4= +go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.31.0 h1:UGZ1QwZWY67Z6BmckTU+9Rxn04m2bD3gD6Mk0OIOCPk= +go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.31.0/go.mod h1:fcwWuDuaObkkChiDlhEpSq9+X1C0omv+s5mBtToAQ64= +go.opentelemetry.io/otel/exporters/zipkin v1.31.0 h1:CgucL0tj3717DJnni7HVVB2wExzi8c2zJNEA2BhLMvI= +go.opentelemetry.io/otel/exporters/zipkin v1.31.0/go.mod h1:rfzOVNiSwIcWtEC2J8epwG26fiaXlYvLySJ7bwsrtAE= +go.opentelemetry.io/otel/metric v1.32.0 h1:xV2umtmNcThh2/a/aCP+h64Xx5wsj8qqnkYZktzNa0M= +go.opentelemetry.io/otel/metric v1.32.0/go.mod h1:jH7CIbbK6SH2V2wE16W05BHCtIDzauciCRLoc/SyMv8= +go.opentelemetry.io/otel/sdk v1.32.0 h1:RNxepc9vK59A8XsgZQouW8ue8Gkb4jpWtJm9ge5lEG4= +go.opentelemetry.io/otel/sdk v1.32.0/go.mod h1:LqgegDBjKMmb2GC6/PrTnteJG39I8/vJCAP9LlJXEjU= +go.opentelemetry.io/otel/trace v1.32.0 h1:WIC9mYrXf8TmY/EXuULKc8hR17vE+Hjv2cssQDe03fM= +go.opentelemetry.io/otel/trace v1.32.0/go.mod h1:+i4rkvCraA+tG6AzwloGaCtkx53Fa+L+V8e9a7YvhT8= +go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0= +go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8= go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/dig v1.18.0 h1:imUL1UiY0Mg4bqbFfsRQO5G4CGRBec/ZujWTvSVp3pw= @@ -712,8 +745,8 @@ golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= -golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s= +golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= @@ -780,6 +813,10 @@ google.golang.org/genproto v0.0.0-20190306203927-b5d61aea6440/go.mod h1:VzzqZJRn google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= +google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28 h1:M0KvPgPmDZHPlbRbaNU1APr28TvwvvdUPlSv7PUvy8g= +google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28/go.mod h1:dguCy7UOdZhTvLzDyt15+rOrawrpM4q7DD9dQ1P11P4= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 h1:XVhgTWWV3kGQlwJHR3upFWZeTsei6Oks1apkZSeonIE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28/go.mod h1:GX3210XPVPUjJbTUbvwI8f2IpZDMZuPJWDzDuebbviI= google.golang.org/grpc v1.14.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= google.golang.org/grpc v1.16.0/go.mod h1:0JHn/cJsOMiMfNA9+DeHDlAU7KAAB5GDlYFpa9MZMio= google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs= @@ -789,6 +826,8 @@ google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyac google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= +google.golang.org/grpc v1.67.1 h1:zWnc1Vrcno+lHZCOofnIMvycFcc0QRGIzm9dhnDX68E= +google.golang.org/grpc v1.67.1/go.mod h1:1gLDyUQU7CTLJI90u3nXZ9ekeghjeM7pTDZlqFNg2AA= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= diff --git a/main.go b/main.go index e8e00c0..8b0fc55 100644 --- a/main.go +++ b/main.go @@ -100,6 +100,18 @@ func main() { EnvVars: []string{"SOMEGUY_LIBP2P_MAX_FD"}, Usage: "maximum number of file descriptors used by libp2p node. Defaults to 50% of the process' limit", }, + &cli.StringFlag{ + Name: "tracing-auth", + Value: "", + EnvVars: []string{"SOMEGUY_TRACING_AUTH"}, + Usage: "If set the key gates use of the Traceparent header by requiring the key to be passed in the Authorization header", + }, + &cli.Float64Flag{ + Name: "sampling-fraction", + Value: 0, + EnvVars: []string{"SOMEGUY_SAMPLING_FRACTION"}, + Usage: "Rate at which to sample gateway requests. Does not include requests with traceheaders which will always sample", + }, }, Action: func(ctx *cli.Context) error { cfg := &config{ @@ -116,6 +128,9 @@ func main() { connMgrGrace: ctx.Duration("libp2p-connmgr-grace"), maxMemory: ctx.Uint64("libp2p-max-memory"), maxFD: ctx.Int("libp2p-max-fd"), + + tracingAuth: ctx.String("tracing-auth"), + samplingFraction: ctx.Float64("sampling-fraction"), } fmt.Printf("Starting %s %s\n", name, version) diff --git a/server.go b/server.go index f7c955b..2a50ec8 100644 --- a/server.go +++ b/server.go @@ -24,11 +24,10 @@ import ( "github.com/libp2p/go-libp2p/core/host" "github.com/libp2p/go-libp2p/core/routing" "github.com/libp2p/go-libp2p/p2p/net/connmgr" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/rs/cors" - metrics "github.com/slok/go-http-metrics/metrics/prometheus" - "github.com/slok/go-http-metrics/middleware" - middlewarestd "github.com/slok/go-http-metrics/middleware/std" + "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" ) var logger = logging.Logger(name) @@ -36,7 +35,7 @@ var logger = logging.Logger(name) func withRequestLogger(next http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { m := httpsnoop.CaptureMetrics(next, w, r) - logger.Debugw(r.Method, "url", r.URL, "host", r.Host, "code", m.Code, "duration", m.Duration, "written", m.Written, "ua", r.UserAgent(), "referer", r.Referer()) + logger.Debugw(r.Method, "url", r.URL, "host", r.Host, "code", m.Code, "duration", m.Duration, "written", m.Written, "accept", r.Header.Get("Accept"), "ua", r.UserAgent(), "referer", r.Referer()) }) } @@ -54,6 +53,9 @@ type config struct { connMgrGrace time.Duration maxMemory uint64 maxFD int + + tracingAuth string + samplingFraction float64 } func start(ctx context.Context, cfg *config) error { @@ -98,15 +100,20 @@ func start(ctx context.Context, cfg *config) error { return err } - mdlw := middleware.New(middleware.Config{ - Recorder: metrics.NewRecorder(metrics.Config{Prefix: "someguy"}), - }) + tp, err := setupTracing(ctx, cfg.samplingFraction) + if err != nil { + return err + } + + defer func() { + _ = tp.Shutdown(ctx) + }() handler := server.Handler(&composableRouter{ providers: crRouters, peers: prRouters, ipns: ipnsRouters, - }) + }, server.WithPrometheusRegistry(prometheus.DefaultRegisterer)) // Add CORS. handler = cors.New(cors.Options{ @@ -122,24 +129,26 @@ func start(ctx context.Context, cfg *config) error { } handler = compress(handler) - // Add metrics. - handler = middlewarestd.Handler("/", mdlw, handler) - // Add request logging. handler = withRequestLogger(handler) + // Add request tracing + handler = withTracingAndDebug(handler, cfg.tracingAuth) + + http.Handle("/", handler) + http.Handle("/debug/metrics/prometheus", promhttp.Handler()) http.HandleFunc("/version", func(w http.ResponseWriter, r *http.Request) { fmt.Fprintf(w, "Client: %s\n", name) fmt.Fprintf(w, "Version: %s\n", version) }) - http.Handle("/", handler) server := &http.Server{Addr: cfg.listenAddress, Handler: nil} quit := make(chan os.Signal, 3) var wg sync.WaitGroup wg.Add(1) + fmt.Printf("Metrics endpoint: http://127.0.0.1:%s/debug/metrics/prometheus\n", port) fmt.Printf("Delegated Routing API on http://127.0.0.1:%s/routing/v1\n", port) go func() { @@ -231,3 +240,22 @@ func getCombinedRouting(endpoints []string, dht routing.Routing) (router, error) routers: append(routers, libp2pRouter{routing: dht}), }}, nil } + +func withTracingAndDebug(next http.Handler, authToken string) http.Handler { + next = otelhttp.NewHandler(next, "someguy.request") + + // Remove tracing and cache skipping headers if not authorized + return http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) { + // Disable tracing/debug headers if auth token missing or invalid + if authToken == "" || request.Header.Get("Authorization") != authToken { + if request.Header.Get("Traceparent") != "" { + request.Header.Del("Traceparent") + } + if request.Header.Get("Tracestate") != "" { + request.Header.Del("Tracestate") + } + } + + next.ServeHTTP(writer, request) + }) +} diff --git a/tracing.go b/tracing.go new file mode 100644 index 0000000..60c98cc --- /dev/null +++ b/tracing.go @@ -0,0 +1,114 @@ +package main + +import ( + "context" + "strings" + + "github.com/ipfs/boxo/tracing" + "go.opentelemetry.io/contrib/propagators/autoprop" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/sdk/resource" + "go.opentelemetry.io/otel/sdk/trace" + semconv "go.opentelemetry.io/otel/semconv/v1.4.0" + traceapi "go.opentelemetry.io/otel/trace" +) + +// SetupTracing sets up the tracing based on the OTEL_* environment variables, +// It returns a trace.TracerProvider. +func setupTracing(ctx context.Context, traceFraction float64) (*trace.TracerProvider, error) { + tp, err := NewTracerProvider(ctx, traceFraction) + if err != nil { + return nil, err + } + + // Sets the default trace provider for this process. If this is not done, tracing + // will not be enabled. Please note that this will apply to the entire process + // as it is set as the default tracer, as per OTel recommendations. + otel.SetTracerProvider(tp) + + // Configures the default propagators used by the Open Telemetry library. By + // using autoprop.NewTextMapPropagator, we ensure the value of the environmental + // variable OTEL_PROPAGATORS is respected, if set. By default, Trace Context + // and Baggage are used. More details on: + // https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/sdk-environment-variables.md + otel.SetTextMapPropagator(autoprop.NewTextMapPropagator()) + + return tp, nil +} + +// NewTracerProvider creates and configures a TracerProvider. +func NewTracerProvider(ctx context.Context, traceFraction float64) (*trace.TracerProvider, error) { + exporters, err := tracing.NewSpanExporters(ctx) + if err != nil { + return nil, err + } + + options := []trace.TracerProviderOption{} + + for _, exporter := range exporters { + options = append(options, trace.WithBatcher(exporter)) + } + + r, err := resource.Merge( + resource.Default(), + resource.NewSchemaless( + semconv.ServiceNameKey.String(name), + semconv.ServiceVersionKey.String(version), + ), + ) + if err != nil { + return nil, err + } + + var baseSampler trace.Sampler + if traceFraction == 0 { + baseSampler = trace.NeverSample() + } else { + baseSampler = trace.TraceIDRatioBased(traceFraction) + } + + // Sample all children whose parents are sampled + // Probabilistically sample if the span is a root which is a Gateway request + sampler := trace.ParentBased( + CascadingSamplerFunc(func(parameters trace.SamplingParameters) bool { + return !traceapi.SpanContextFromContext(parameters.ParentContext).IsValid() + }, "root sampler", + CascadingSamplerFunc(func(parameters trace.SamplingParameters) bool { + return strings.HasPrefix(parameters.Name, "someguy") + }, "someguy request sampler", + baseSampler))) + + options = append(options, trace.WithResource(r), trace.WithSampler(sampler)) + return trace.NewTracerProvider(options...), nil +} + +// CascadingSamplerFunc will sample with the next tracer if the condition is met, otherwise the sample will be dropped +func CascadingSamplerFunc(shouldSample func(parameters trace.SamplingParameters) bool, description string, next trace.Sampler) trace.Sampler { + return funcSampler{ + next: next, + fn: func(parameters trace.SamplingParameters) trace.SamplingResult { + if shouldSample(parameters) { + return next.ShouldSample(parameters) + } + return trace.SamplingResult{ + Decision: trace.Drop, + Tracestate: traceapi.SpanContextFromContext(parameters.ParentContext).TraceState(), + } + }, + description: description, + } +} + +type funcSampler struct { + next trace.Sampler + fn func(trace.SamplingParameters) trace.SamplingResult + description string +} + +func (f funcSampler) ShouldSample(parameters trace.SamplingParameters) trace.SamplingResult { + return f.fn(parameters) +} + +func (f funcSampler) Description() string { + return f.description +}