Skip to content

Commit

Permalink
Correctly cancel GRPC context beneath the HTTP server (#3443)
Browse files Browse the repository at this point in the history
* cancel context

Signed-off-by: Joe Elliott <number101010@gmail.com>

* update dskit

Signed-off-by: Joe Elliott <number101010@gmail.com>

* focused timeouts

Signed-off-by: Joe Elliott <number101010@gmail.com>

* docs

Signed-off-by: Joe Elliott <number101010@gmail.com>

* lint N docs

Signed-off-by: Joe Elliott <number101010@gmail.com>

* more lint

Signed-off-by: Joe Elliott <number101010@gmail.com>

* make update-mod

Signed-off-by: Joe Elliott <number101010@gmail.com>

---------

Signed-off-by: Joe Elliott <number101010@gmail.com>
  • Loading branch information
joe-elliott authored Mar 12, 2024
1 parent ae083c3 commit eb81b92
Show file tree
Hide file tree
Showing 46 changed files with 4,313 additions and 124 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
* [BUGFIX] Fix metrics query results when series contain empty strings or nil values [#3429](https://github.com/grafana/tempo/issues/3429) (@mdisibio)
* [BUGFIX] Return unfiltered results when a bad TraceQL query is provided in autocomplete. [#3426](https://github.com/grafana/tempo/pull/3426) (@mapno)
* [BUGFIX] Correctly handle 429s in GRPC search streaming. [#3469](https://github.com/grafana/tempo/pull/3469) (@joe-ellitot)
* [BUGFIX] Correctly cancel GRPC and HTTP contexts in the frontend to prevent having to rely on http write timeout. [#3443](https://github.com/grafana/tempo/pull/3443) (@joe-elliott)

## v2.4.0

Expand Down
4 changes: 2 additions & 2 deletions cmd/tempo-serverless/cloud-run/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ require (
github.com/googleapis/gax-go/v2 v2.12.0 // indirect
github.com/gorilla/handlers v1.5.1 // indirect
github.com/gorilla/mux v1.8.1 // indirect
github.com/grafana/dskit v0.0.0-20240116202611-824e75a28ee8 // indirect
github.com/grafana/gomemcache v0.0.0-20231023152154-6947259a0586 // indirect
github.com/grafana/dskit v0.0.0-20240311184239-73feada6c0d7 // indirect
github.com/grafana/gomemcache v0.0.0-20240229205252-cd6a66d6fb56 // indirect
github.com/grafana/pyroscope-go/godeltaprof v0.1.6 // indirect
github.com/grafana/regexp v0.0.0-20221123153739-15dc172cd2db // indirect
github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 // indirect
Expand Down
8 changes: 4 additions & 4 deletions cmd/tempo-serverless/cloud-run/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -189,10 +189,10 @@ github.com/gorilla/handlers v1.5.1 h1:9lRY6j8DEeeBT10CvO9hGW0gmky0BprnvDI5vfhUHH
github.com/gorilla/handlers v1.5.1/go.mod h1:t8XrUpc4KVXb7HGyJ4/cEnwQiaxrX/hz1Zv/4g96P1Q=
github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ=
github.com/grafana/dskit v0.0.0-20240116202611-824e75a28ee8 h1:qduBYOZAR5/RUO6yOlq1qYSw4tqeS3YeNxIHpQ4JIW8=
github.com/grafana/dskit v0.0.0-20240116202611-824e75a28ee8/go.mod h1:x5DMwyr1kyirtHOxoFSZ7RnyOgHdGh03ZruupdPetQM=
github.com/grafana/gomemcache v0.0.0-20231023152154-6947259a0586 h1:/of8Z8taCPftShATouOrBVy6GaTTjgQd/VfNiZp/VXQ=
github.com/grafana/gomemcache v0.0.0-20231023152154-6947259a0586/go.mod h1:PGk3RjYHpxMM8HFPhKKo+vve3DdlPUELZLSDEFehPuU=
github.com/grafana/dskit v0.0.0-20240311184239-73feada6c0d7 h1:yd9yoNgEOtp8O0MbtqXoMVqr+ZbU4oZFE8a04z8WXFE=
github.com/grafana/dskit v0.0.0-20240311184239-73feada6c0d7/go.mod h1:RpTvZ9nkdXqyQro5DULQHJl9B6vwvEj95Dk6WIXqTLQ=
github.com/grafana/gomemcache v0.0.0-20240229205252-cd6a66d6fb56 h1:X8IKQ0wu40wpvYcKfBcc5T4QnhdQjUhtUtB/1CY89lE=
github.com/grafana/gomemcache v0.0.0-20240229205252-cd6a66d6fb56/go.mod h1:PGk3RjYHpxMM8HFPhKKo+vve3DdlPUELZLSDEFehPuU=
github.com/grafana/pyroscope-go/godeltaprof v0.1.6 h1:nEdZ8louGAplSvIJi1HVp7kWvFvdiiYg3COLlTwJiFo=
github.com/grafana/pyroscope-go/godeltaprof v0.1.6/go.mod h1:Tk376Nbldo4Cha9RgiU7ik8WKFkNpfds98aUzS8omLE=
github.com/grafana/regexp v0.0.0-20221123153739-15dc172cd2db h1:7aN5cccjIqCLTzedH7MZzRZt5/lsAHch6Z3L2ZGn5FA=
Expand Down
4 changes: 2 additions & 2 deletions cmd/tempo-serverless/lambda/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ require (
github.com/googleapis/gax-go/v2 v2.12.0 // indirect
github.com/gorilla/handlers v1.5.1 // indirect
github.com/gorilla/mux v1.8.1 // indirect
github.com/grafana/dskit v0.0.0-20240116202611-824e75a28ee8 // indirect
github.com/grafana/gomemcache v0.0.0-20231023152154-6947259a0586 // indirect
github.com/grafana/dskit v0.0.0-20240311184239-73feada6c0d7 // indirect
github.com/grafana/gomemcache v0.0.0-20240229205252-cd6a66d6fb56 // indirect
github.com/grafana/pyroscope-go/godeltaprof v0.1.6 // indirect
github.com/grafana/regexp v0.0.0-20221123153739-15dc172cd2db // indirect
github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 // indirect
Expand Down
8 changes: 4 additions & 4 deletions cmd/tempo-serverless/lambda/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -193,10 +193,10 @@ github.com/gorilla/handlers v1.5.1 h1:9lRY6j8DEeeBT10CvO9hGW0gmky0BprnvDI5vfhUHH
github.com/gorilla/handlers v1.5.1/go.mod h1:t8XrUpc4KVXb7HGyJ4/cEnwQiaxrX/hz1Zv/4g96P1Q=
github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ=
github.com/grafana/dskit v0.0.0-20240116202611-824e75a28ee8 h1:qduBYOZAR5/RUO6yOlq1qYSw4tqeS3YeNxIHpQ4JIW8=
github.com/grafana/dskit v0.0.0-20240116202611-824e75a28ee8/go.mod h1:x5DMwyr1kyirtHOxoFSZ7RnyOgHdGh03ZruupdPetQM=
github.com/grafana/gomemcache v0.0.0-20231023152154-6947259a0586 h1:/of8Z8taCPftShATouOrBVy6GaTTjgQd/VfNiZp/VXQ=
github.com/grafana/gomemcache v0.0.0-20231023152154-6947259a0586/go.mod h1:PGk3RjYHpxMM8HFPhKKo+vve3DdlPUELZLSDEFehPuU=
github.com/grafana/dskit v0.0.0-20240311184239-73feada6c0d7 h1:yd9yoNgEOtp8O0MbtqXoMVqr+ZbU4oZFE8a04z8WXFE=
github.com/grafana/dskit v0.0.0-20240311184239-73feada6c0d7/go.mod h1:RpTvZ9nkdXqyQro5DULQHJl9B6vwvEj95Dk6WIXqTLQ=
github.com/grafana/gomemcache v0.0.0-20240229205252-cd6a66d6fb56 h1:X8IKQ0wu40wpvYcKfBcc5T4QnhdQjUhtUtB/1CY89lE=
github.com/grafana/gomemcache v0.0.0-20240229205252-cd6a66d6fb56/go.mod h1:PGk3RjYHpxMM8HFPhKKo+vve3DdlPUELZLSDEFehPuU=
github.com/grafana/pyroscope-go/godeltaprof v0.1.6 h1:nEdZ8louGAplSvIJi1HVp7kWvFvdiiYg3COLlTwJiFo=
github.com/grafana/pyroscope-go/godeltaprof v0.1.6/go.mod h1:Tk376Nbldo4Cha9RgiU7ik8WKFkNpfds98aUzS8omLE=
github.com/grafana/regexp v0.0.0-20221123153739-15dc172cd2db h1:7aN5cccjIqCLTzedH7MZzRZt5/lsAHch6Z3L2ZGn5FA=
Expand Down
22 changes: 19 additions & 3 deletions cmd/tempo/app/modules.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"github.com/grafana/tempo/modules/compactor"
"github.com/grafana/tempo/modules/distributor"
"github.com/grafana/tempo/modules/frontend"
"github.com/grafana/tempo/modules/frontend/interceptor"
frontend_v1pb "github.com/grafana/tempo/modules/frontend/v1/frontendv1pb"
"github.com/grafana/tempo/modules/generator"
"github.com/grafana/tempo/modules/ingester"
Expand Down Expand Up @@ -106,6 +107,13 @@ func (t *App) initServer() (services.Service, error) {
return svs
}

// add unary and stream timeout interceptors for the query-frontend if configured
// this same timeout is enforced for http in the initQueryFrontend() function
if t.cfg.Frontend.APITimeout > 0 && t.isModuleActive(QueryFrontend) {
t.cfg.Server.GRPCMiddleware = append(t.cfg.Server.GRPCMiddleware, interceptor.NewFrontendAPIUnaryTimeout(t.cfg.Frontend.APITimeout))
t.cfg.Server.GRPCStreamMiddleware = append(t.cfg.Server.GRPCStreamMiddleware, interceptor.NewFrontendAPIStreamTimeout(t.cfg.Frontend.APITimeout))
}

return t.Server.StartAndReturnService(t.cfg.Server, t.cfg.StreamOverHTTPEnabled, servicesToWaitFor)
}

Expand Down Expand Up @@ -366,11 +374,19 @@ func (t *App) initQueryFrontend() (services.Service, error) {
// this GRPC service to be available on the HTTP server.
tempopb.RegisterStreamingQuerierServer(t.Server.GRPC(), queryFrontend)

// wrap handlers with auth
base := middleware.Merge(
httpAPIMiddleware := []middleware.Interface{
t.HTTPAuthMiddleware,
httpGzipMiddleware(),
)
}

// use the api timeout for http requests if set. note that this is set in initServer() for
// grpc requests
if t.cfg.Frontend.APITimeout > 0 {
httpAPIMiddleware = append(httpAPIMiddleware, middleware.NewTimeoutMiddleware(t.cfg.Frontend.APITimeout, "unable to process request in the configured timeout", t.Server.Log()))
}

// wrap handlers with auth
base := middleware.Merge(httpAPIMiddleware...)

// http trace by id endpoint
t.Server.HTTPRouter().Handle(addHTTPAPIPrefix(&t.cfg, api.PathTraces), base.Wrap(queryFrontend.TraceByIDHandler))
Expand Down
9 changes: 9 additions & 0 deletions docs/sources/tempo/configuration/_index.md
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,15 @@ query_frontend:
# to both query stats and slow queries logs.
[log_query_request_headers: <string> | default = ""]

# Set a maximum timeout for all api queries at which point the frontend will cancel queued jobs
# and return cleanly. HTTP will return a 503 and GRPC will return a context canceled error.
# This timeout impacts all http and grpc streaming queries as part of the Tempo api surface such as
# search, metrics summary, tags and tag values lookups, etc.
# Generally it is preferred to let the client cancel context. This is a failsafe to prevent a client
# from imposing more work on Tempo than desired.
# (default: 0)
[api_timeout: <duration>]

search:

# The number of concurrent jobs to execute when searching the backend.
Expand Down
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ require (
github.com/google/go-cmp v0.6.0
github.com/google/uuid v1.6.0
github.com/gorilla/mux v1.8.1
github.com/grafana/dskit v0.0.0-20240116202611-824e75a28ee8
github.com/grafana/dskit v0.0.0-20240311184239-73feada6c0d7
github.com/grafana/e2e v0.1.1
github.com/grpc-ecosystem/grpc-opentracing v0.0.0-20180507213350-8e809c8a8645
github.com/hashicorp/go-hclog v1.6.2
Expand Down Expand Up @@ -104,7 +104,7 @@ require (
github.com/evanphx/json-patch v5.6.0+incompatible
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da
github.com/googleapis/gax-go/v2 v2.12.0
github.com/grafana/gomemcache v0.0.0-20231023152154-6947259a0586
github.com/grafana/gomemcache v0.0.0-20240229205252-cd6a66d6fb56
github.com/grpc-ecosystem/go-grpc-middleware v1.4.0
github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl v0.89.0
github.com/open-telemetry/opentelemetry-collector-contrib/processor/filterprocessor v0.89.0
Expand Down
8 changes: 4 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -501,12 +501,12 @@ github.com/gorilla/securecookie v1.1.1/go.mod h1:ra0sb63/xPlUeL+yeDciTfxMRAA+MP+
github.com/gorilla/sessions v1.2.1/go.mod h1:dk2InVEVJ0sfLlnXv9EAgkf6ecYs/i80K/zI+bUmuGM=
github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWmnc=
github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/grafana/dskit v0.0.0-20240116202611-824e75a28ee8 h1:qduBYOZAR5/RUO6yOlq1qYSw4tqeS3YeNxIHpQ4JIW8=
github.com/grafana/dskit v0.0.0-20240116202611-824e75a28ee8/go.mod h1:x5DMwyr1kyirtHOxoFSZ7RnyOgHdGh03ZruupdPetQM=
github.com/grafana/dskit v0.0.0-20240311184239-73feada6c0d7 h1:yd9yoNgEOtp8O0MbtqXoMVqr+ZbU4oZFE8a04z8WXFE=
github.com/grafana/dskit v0.0.0-20240311184239-73feada6c0d7/go.mod h1:RpTvZ9nkdXqyQro5DULQHJl9B6vwvEj95Dk6WIXqTLQ=
github.com/grafana/e2e v0.1.1 h1:/b6xcv5BtoBnx8cZnCiey9DbjEc8z7gXHO5edoeRYxc=
github.com/grafana/e2e v0.1.1/go.mod h1:RpNLgae5VT+BUHvPE+/zSypmOXKwEu4t+tnEMS1ATaE=
github.com/grafana/gomemcache v0.0.0-20231023152154-6947259a0586 h1:/of8Z8taCPftShATouOrBVy6GaTTjgQd/VfNiZp/VXQ=
github.com/grafana/gomemcache v0.0.0-20231023152154-6947259a0586/go.mod h1:PGk3RjYHpxMM8HFPhKKo+vve3DdlPUELZLSDEFehPuU=
github.com/grafana/gomemcache v0.0.0-20240229205252-cd6a66d6fb56 h1:X8IKQ0wu40wpvYcKfBcc5T4QnhdQjUhtUtB/1CY89lE=
github.com/grafana/gomemcache v0.0.0-20240229205252-cd6a66d6fb56/go.mod h1:PGk3RjYHpxMM8HFPhKKo+vve3DdlPUELZLSDEFehPuU=
github.com/grafana/memberlist v0.3.1-0.20220708130638-bd88e10a3d91 h1:/NipyHnOmvRsVzj81j2qE0VxsvsqhOB0f4vJIhk2qCQ=
github.com/grafana/memberlist v0.3.1-0.20220708130638-bd88e10a3d91/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE=
github.com/grafana/pyroscope-go/godeltaprof v0.1.6 h1:nEdZ8louGAplSvIJi1HVp7kWvFvdiiYg3COLlTwJiFo=
Expand Down
8 changes: 8 additions & 0 deletions modules/distributor/distributor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1510,3 +1510,11 @@ func (r mockRing) CleanupShuffleShardCache(string) {
func (r mockRing) GetInstanceState(string) (ring.InstanceState, error) {
return ring.ACTIVE, nil
}

func (r mockRing) InstancesInZoneCount(string) int {
return 0
}

func (r mockRing) ZonesCount() int {
return 0
}
7 changes: 7 additions & 0 deletions modules/frontend/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,16 @@ type Config struct {
TraceByID TraceByIDConfig `yaml:"trace_by_id"`
Metrics MetricsConfig `yaml:"metrics"`
MultiTenantQueriesEnabled bool `yaml:"multi_tenant_queries_enabled"`

// the maximum time limit that tempo will work on an api request. this includes both
// grpc and http requests and applies to all "api" frontend query endpoints such as
// traceql, tag search, tag value search, trace by id and all streaming gRPC endpoints.
// 0 disables
APITimeout time.Duration `yaml:"api_timeout,omitempty"`
}

type SearchConfig struct {
Timeout time.Duration `yaml:"timeout,omitempty"`
Sharder SearchSharderConfig `yaml:",inline"`
SLO SLOConfig `yaml:",inline"`
}
Expand Down
40 changes: 40 additions & 0 deletions modules/frontend/interceptor/interceptor.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package interceptor

import (
"context"
"strings"
"time"

grpc_middleware "github.com/grpc-ecosystem/go-grpc-middleware"
"google.golang.org/grpc"
)

const streamingQuerierPrefix = "/tempopb.StreamingQuerier/"

func NewFrontendAPIUnaryTimeout(timeout time.Duration) grpc.UnaryServerInterceptor {
return func(ctx context.Context, req any, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (any, error) {
if strings.HasPrefix(info.FullMethod, streamingQuerierPrefix) {
var cancel context.CancelFunc
ctx, cancel = context.WithTimeout(ctx, timeout)
defer cancel()
}

return handler(ctx, req)
}
}

func NewFrontendAPIStreamTimeout(timeout time.Duration) grpc.StreamServerInterceptor {
return func(srv any, ss grpc.ServerStream, info *grpc.StreamServerInfo, handler grpc.StreamHandler) error {
ctx := ss.Context()
if strings.HasPrefix(info.FullMethod, streamingQuerierPrefix) {
var cancel context.CancelFunc
ctx, cancel = context.WithTimeout(ss.Context(), timeout)
defer cancel()
}

return handler(srv, &grpc_middleware.WrappedServerStream{
ServerStream: ss,
WrappedContext: ctx,
})
}
}
97 changes: 97 additions & 0 deletions modules/frontend/interceptor/interceptor_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package interceptor

import (
"context"
"net"
"testing"
"time"

"github.com/grafana/tempo/pkg/gogocodec"
"github.com/grafana/tempo/pkg/tempopb"
"github.com/stretchr/testify/require"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
"google.golang.org/grpc/encoding"
)

func TestInterceptorsCancelContextForStreaming(t *testing.T) {
encoding.RegisterCodec(gogocodec.NewCodec())

interceptorTimeout := time.Second
apiTimeout := time.Second * 5

unaryInt := NewFrontendAPIUnaryTimeout(interceptorTimeout)
streamInt := NewFrontendAPIStreamTimeout(interceptorTimeout)

serv := grpc.NewServer(grpc.UnaryInterceptor(unaryInt), grpc.StreamInterceptor(streamInt))
defer serv.GracefulStop()

srv := &mockService{apiTimeout}
tempopb.RegisterStreamingQuerierServer(serv, srv)
tempopb.RegisterPusherServer(serv, srv)

listener, err := net.Listen("tcp", "localhost:0")
require.NoError(t, err)

go func() {
require.NoError(t, serv.Serve(listener))
}()

conn, err := grpc.Dial(listener.Addr().String(), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(10e6), grpc.MaxCallSendMsgSize(10e6)))
require.NoError(t, err)
defer func() {
require.NoError(t, conn.Close())
}()

// test that a streaming client has its context cancelled after the interceptor timeout and before the api timeout
c := tempopb.NewStreamingQuerierClient(conn)
client, err := c.Search(context.Background(), &tempopb.SearchRequest{})
require.NoError(t, err)

start := time.Now()
_, err = client.Recv()
require.EqualError(t, err, "rpc error: code = DeadlineExceeded desc = context deadline exceeded")
require.LessOrEqual(t, time.Since(start), apiTimeout) // confirm that we didn't wait for the full api timeout

// test that the pusher client does not have its context cancelled and waits for the full api timeout
pc := tempopb.NewPusherClient(conn)

start = time.Now()
_, err = pc.PushBytesV2(context.Background(), &tempopb.PushBytesRequest{})
require.NoError(t, err)
require.GreaterOrEqual(t, time.Since(start), apiTimeout) // confirm that we did wait for the full api timeout
}

type mockService struct {
apiTimeout time.Duration
}

func (s *mockService) Search(_ *tempopb.SearchRequest, ss tempopb.StreamingQuerier_SearchServer) error {
select {
case <-time.After(s.apiTimeout):
case <-ss.Context().Done():
return ss.Context().Err()
}

return nil
}

func (s *mockService) PushBytes(ctx context.Context, _ *tempopb.PushBytesRequest) (*tempopb.PushResponse, error) {
select {
case <-time.After(s.apiTimeout):
case <-ctx.Done():
return nil, ctx.Err()
}

return &tempopb.PushResponse{}, nil
}

func (s *mockService) PushBytesV2(ctx context.Context, _ *tempopb.PushBytesRequest) (*tempopb.PushResponse, error) {
select {
case <-time.After(s.apiTimeout):
case <-ctx.Done():
return nil, ctx.Err()
}

return &tempopb.PushResponse{}, nil
}
46 changes: 46 additions & 0 deletions vendor/github.com/grafana/dskit/concurrency/runner.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit eb81b92

Please sign in to comment.