Skip to content

Commit

Permalink
Add storage metrics to OTEL, metrics by span service name
Browse files Browse the repository at this point in the history
Signed-off-by: Pavol Loffay <ploffay@redhat.com>
  • Loading branch information
pavolloffay committed Sep 2, 2020
1 parent e558711 commit 576110f
Show file tree
Hide file tree
Showing 16 changed files with 342 additions and 51 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import (
// new creates Elasticsearch exporter/storage.
func new(ctx context.Context, config *Config, params component.ExporterCreateParams) (component.TraceExporter, error) {
esCfg := config.GetPrimary()
w, err := newEsSpanWriter(*esCfg, params.Logger)
w, err := newEsSpanWriter(*esCfg, params.Logger, config.Name())
if err != nil {
return nil, err
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ func (s *IntegrationTest) initSpanstore(allTagsAsFields bool) error {
AllAsFields: allTagsAsFields,
},
}
w, err := newEsSpanWriter(cfg, s.logger)
w, err := newEsSpanWriter(cfg, s.logger, "elasticsearch")
if err != nil {
return err
}
Expand Down
33 changes: 28 additions & 5 deletions cmd/opentelemetry/app/exporter/elasticsearchexporter/spanstore.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,15 @@ import (
"strings"
"time"

"go.opencensus.io/stats"
"go.opencensus.io/tag"
"go.opentelemetry.io/collector/component/componenterror"
"go.opentelemetry.io/collector/consumer/consumererror"
"go.opentelemetry.io/collector/consumer/pdata"
"go.uber.org/zap"

"github.com/jaegertracing/jaeger/cmd/opentelemetry/app/exporter/elasticsearchexporter/esmodeltranslator"
"github.com/jaegertracing/jaeger/cmd/opentelemetry/app/exporter/storagemetrics"
"github.com/jaegertracing/jaeger/cmd/opentelemetry/app/internal/esclient"
"github.com/jaegertracing/jaeger/model"
"github.com/jaegertracing/jaeger/pkg/cache"
Expand All @@ -47,6 +50,7 @@ const (
// esSpanWriter holds components required for ES span writer
type esSpanWriter struct {
logger *zap.Logger
nameTag tag.Mutator
client esclient.ElasticsearchClient
serviceCache cache.Cache
spanIndexName indexNameProvider
Expand All @@ -55,7 +59,7 @@ type esSpanWriter struct {
}

// newEsSpanWriter creates new instance of esSpanWriter
func newEsSpanWriter(params config.Configuration, logger *zap.Logger) (*esSpanWriter, error) {
func newEsSpanWriter(params config.Configuration, logger *zap.Logger, name string) (*esSpanWriter, error) {
client, err := esclient.NewElasticsearchClient(params, logger)
if err != nil {
return nil, err
Expand All @@ -65,6 +69,8 @@ func newEsSpanWriter(params config.Configuration, logger *zap.Logger) (*esSpanWr
return nil, err
}
return &esSpanWriter{
logger: logger,
nameTag: tag.Insert(storagemetrics.TagExporterName(), name),
client: client,
spanIndexName: newIndexNameProvider(spanIndexBaseName, params.IndexPrefix, params.UseReadWriteAliases),
serviceIndexName: newIndexNameProvider(serviceIndexBaseName, params.IndexPrefix, params.UseReadWriteAliases),
Expand Down Expand Up @@ -159,14 +165,17 @@ func (w *esSpanWriter) writeSpans(ctx context.Context, spans []*dbmodel.Span) (i
errs = append(errs, err)
return len(spans), componenterror.CombineErrors(errs)
}
droppedFromResponse := w.handleResponse(res, bulkOperations)
droppedFromResponse := w.handleResponse(ctx, res, bulkOperations)
dropped += droppedFromResponse
return dropped, componenterror.CombineErrors(errs)
}

func (w *esSpanWriter) handleResponse(blk *esclient.BulkResponse, operationToSpan []bulkItem) int {
func (w *esSpanWriter) handleResponse(ctx context.Context, blk *esclient.BulkResponse, operationToSpan []bulkItem) int {
numErrors := 0
storedSpans := map[string]int64{}
notStoredSpans := map[string]int64{}
for i, d := range blk.Items {
bulkOp := operationToSpan[i]
if d.Index.Status > 201 {
numErrors++
w.logger.Error("Part of the bulk request failed",
Expand All @@ -177,15 +186,29 @@ func (w *esSpanWriter) handleResponse(blk *esclient.BulkResponse, operationToSpa
zap.String("error.cause.reason", d.Index.Error.Cause.Reason))
// TODO return an error or a struct that indicates which spans should be retried
// https://github.com/open-telemetry/opentelemetry-collector/issues/990
if !bulkOp.isService {
notStoredSpans[bulkOp.span.Process.ServiceName] = notStoredSpans[bulkOp.span.Process.ServiceName] + 1
}
} else {
// passed
bulkOp := operationToSpan[i]
if bulkOp.isService {
if !bulkOp.isService {
storedSpans[bulkOp.span.Process.ServiceName] = storedSpans[bulkOp.span.Process.ServiceName] + 1
} else {
cacheKey := hashCode(bulkOp.span.Process.ServiceName, bulkOp.span.OperationName)
w.serviceCache.Put(cacheKey, cacheKey)
}
}
}
for k, v := range notStoredSpans {
ctx, _ := tag.New(ctx,
tag.Insert(storagemetrics.TagServiceName(), k), w.nameTag)
stats.Record(ctx, storagemetrics.StatSpansNotStoredCount().M(v))
}
for k, v := range storedSpans {
ctx, _ := tag.New(ctx,
tag.Insert(storagemetrics.TagServiceName(), k), w.nameTag)
stats.Record(ctx, storagemetrics.StatSpansStoredCount().M(v))
}
return numErrors
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Copyright (c) 2020 The Jaeger Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package elasticsearchexporter

import (
"context"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.opencensus.io/stats/view"
"go.uber.org/zap"

"github.com/jaegertracing/jaeger/cmd/opentelemetry/app/exporter/storagemetrics"
"github.com/jaegertracing/jaeger/cmd/opentelemetry/app/internal/esclient"
"github.com/jaegertracing/jaeger/pkg/es/config"
"github.com/jaegertracing/jaeger/plugin/storage/es/spanstore/dbmodel"
)

func TestMetrics(t *testing.T) {
w, err := newEsSpanWriter(config.Configuration{Servers: []string{"localhost:9200"}, Version: 6}, zap.NewNop(), "elasticsearch")
require.NoError(t, err)
response := &esclient.BulkResponse{}
response.Items = []esclient.BulkResponseItem{
{Index: esclient.BulkIndexResponse{Status: 200}},
{Index: esclient.BulkIndexResponse{Status: 500}},
{Index: esclient.BulkIndexResponse{Status: 200}},
{Index: esclient.BulkIndexResponse{Status: 500}},
}
blkItms := []bulkItem{
{isService: true, span: &dbmodel.Span{}},
{isService: true, span: &dbmodel.Span{}},
{span: &dbmodel.Span{Process: dbmodel.Process{ServiceName: "foo"}}},
{span: &dbmodel.Span{Process: dbmodel.Process{ServiceName: "foo"}}},
}

views := storagemetrics.MetricViews()
require.NoError(t, view.Register(views...))
defer view.Unregister(views...)

errs := w.handleResponse(context.Background(), response, blkItms)
assert.Equal(t, 2, errs)

viewData, err := view.RetrieveData(storagemetrics.StatSpansStoredCount().Name())
require.NoError(t, err)
require.Equal(t, 1, len(viewData))
distData := viewData[0].Data.(*view.SumData)
assert.Equal(t, float64(1), distData.Value)

viewData, err = view.RetrieveData(storagemetrics.StatSpansNotStoredCount().Name())
require.NoError(t, err)
require.Equal(t, 1, len(viewData))
distData = viewData[0].Data.(*view.SumData)
assert.Equal(t, float64(1), distData.Value)
}
30 changes: 21 additions & 9 deletions cmd/opentelemetry/app/exporter/span_writer_exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@ package exporter

import (
"context"
"io"

"go.opencensus.io/stats"
"go.opencensus.io/tag"
"go.opentelemetry.io/collector/component"
"go.opentelemetry.io/collector/component/componenterror"
"go.opentelemetry.io/collector/config/configmodels"
Expand All @@ -26,6 +27,7 @@ import (
"go.opentelemetry.io/collector/exporter/exporterhelper"
jaegertranslator "go.opentelemetry.io/collector/translator/trace/jaeger"

"github.com/jaegertracing/jaeger/cmd/opentelemetry/app/exporter/storagemetrics"
"github.com/jaegertracing/jaeger/storage"
"github.com/jaegertracing/jaeger/storage/spanstore"
)
Expand All @@ -36,21 +38,16 @@ func NewSpanWriterExporter(config configmodels.Exporter, factory storage.Factory
if err != nil {
return nil, err
}
storage := store{Writer: spanWriter}
opts = append(opts, exporterhelper.WithShutdown(func(ctx context.Context) error {
if closer, ok := spanWriter.(io.Closer); ok {
return closer.Close()
}
return nil
}))
storage := store{Writer: spanWriter, storageNameTag: tag.Insert(storagemetrics.TagExporterName(), config.Name())}
return exporterhelper.NewTraceExporter(
config,
storage.traceDataPusher,
opts...)
}

type store struct {
Writer spanstore.Writer
Writer spanstore.Writer
storageNameTag tag.Mutator
}

// traceDataPusher implements OTEL exporterhelper.traceDataPusher
Expand All @@ -61,15 +58,30 @@ func (s *store) traceDataPusher(ctx context.Context, td pdata.Traces) (droppedSp
}
dropped := 0
var errs []error
storedSpans := map[string]int64{}
notStoredSpans := map[string]int64{}
for _, batch := range batches {
for _, span := range batch.Spans {
span.Process = batch.Process
err := s.Writer.WriteSpan(ctx, span)
if err != nil {
errs = append(errs, err)
dropped++
notStoredSpans[span.Process.ServiceName] = notStoredSpans[span.Process.ServiceName] + 1
} else {
storedSpans[span.Process.ServiceName] = storedSpans[span.Process.ServiceName] + 1
}
}
}
for k, v := range notStoredSpans {
ctx, _ := tag.New(ctx,
tag.Insert(storagemetrics.TagServiceName(), k), s.storageNameTag)
stats.Record(ctx, storagemetrics.StatSpansNotStoredCount().M(v))
}
for k, v := range storedSpans {
ctx, _ := tag.New(ctx,
tag.Insert(storagemetrics.TagServiceName(), k), s.storageNameTag)
stats.Record(ctx, storagemetrics.StatSpansStoredCount().M(v))
}
return dropped, componenterror.CombineErrors(errs)
}
61 changes: 44 additions & 17 deletions cmd/opentelemetry/app/exporter/span_writer_exporter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,13 @@ import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/uber/jaeger-lib/metrics"
"go.opencensus.io/stats/view"
"go.opencensus.io/tag"
"go.opentelemetry.io/collector/config/configmodels"
"go.opentelemetry.io/collector/consumer/pdata"
"go.uber.org/zap"

"github.com/jaegertracing/jaeger/cmd/opentelemetry/app/exporter/storagemetrics"
"github.com/jaegertracing/jaeger/model"
"github.com/jaegertracing/jaeger/storage/dependencystore"
"github.com/jaegertracing/jaeger/storage/spanstore"
Expand Down Expand Up @@ -74,49 +77,73 @@ func TestStore(t *testing.T) {
traceID := []byte("0123456789abcdef")
spanID := []byte("01234567")
tests := []struct {
storage store
data pdata.Traces
err string
dropped int
caption string
storage store
data pdata.Traces
err string
dropped int
caption string
metricStored float64
metricNotStored float64
}{
{
caption: "nothing to store",
storage: store{Writer: spanWriter{}},
storage: store{Writer: spanWriter{}, storageNameTag: tag.Insert(storagemetrics.TagExporterName(), "memory")},
data: traces(),
dropped: 0,
},
{
caption: "wrong data",
storage: store{Writer: spanWriter{}},
storage: store{Writer: spanWriter{}, storageNameTag: tag.Insert(storagemetrics.TagExporterName(), "memory")},
data: AddSpan(traces(), "", nil, nil),
err: "TraceID is nil",
dropped: 1,
},
{
caption: "one error in writer",
storage: store{Writer: spanWriter{err: errors.New("could not store")}},
data: AddSpan(AddSpan(traces(), "error", traceID, spanID), "", traceID, spanID),
dropped: 1,
err: "could not store",
caption: "one error in writer",
storage: store{Writer: spanWriter{err: errors.New("could not store")}, storageNameTag: tag.Insert(storagemetrics.TagExporterName(), "memory")},
data: AddSpan(AddSpan(traces(), "error", traceID, spanID), "", traceID, spanID),
dropped: 1,
err: "could not store",
metricNotStored: 1,
metricStored: 1,
},
{
caption: "two errors in writer",
storage: store{Writer: spanWriter{err: errors.New("could not store")}},
data: AddSpan(AddSpan(traces(), "error", traceID, spanID), "error", traceID, spanID),
dropped: 2,
err: "[could not store; could not store]",
caption: "two errors in writer",
storage: store{Writer: spanWriter{err: errors.New("could not store")}, storageNameTag: tag.Insert(storagemetrics.TagExporterName(), "memory")},
data: AddSpan(AddSpan(traces(), "error", traceID, spanID), "error", traceID, spanID),
dropped: 2,
err: "[could not store; could not store]",
metricNotStored: 2,
},
}
for _, test := range tests {
t.Run(test.caption, func(t *testing.T) {
views := storagemetrics.MetricViews()
require.NoError(t, view.Register(views...))
defer view.Unregister(views...)

dropped, err := test.storage.traceDataPusher(context.Background(), test.data)
assert.Equal(t, test.dropped, dropped)
if test.err != "" {
assert.Contains(t, err.Error(), test.err)
} else {
require.NoError(t, err)
}

if test.metricStored > 0 {
viewData, err := view.RetrieveData(storagemetrics.StatSpansStoredCount().Name())
require.NoError(t, err)
require.Equal(t, 1, len(viewData))
distData := viewData[0].Data.(*view.SumData)
assert.Equal(t, test.metricStored, distData.Value)
}
if test.metricNotStored > 0 {
viewData, err := view.RetrieveData(storagemetrics.StatSpansNotStoredCount().Name())
require.NoError(t, err)
require.Equal(t, 1, len(viewData))
distData := viewData[0].Data.(*view.SumData)
assert.Equal(t, test.metricNotStored, distData.Value)
}
})
}
}
Expand Down
Loading

0 comments on commit 576110f

Please sign in to comment.