Skip to content

Commit

Permalink
Add booster-bitswap request and response count metrics to dashboard (#…
Browse files Browse the repository at this point in the history
…838)

* feat: add bitswap request and response count metrics to dashboard

* add configurable metrics http port; move http metrics server to main away from bitswap server

Co-authored-by: Anton Evangelatov <anton.evangelatov@gmail.com>
  • Loading branch information
kylehuntsman and nonsense authored Oct 6, 2022
1 parent 624e14d commit a240606
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 1 deletion.
17 changes: 17 additions & 0 deletions cmd/booster-bitswap/remoteblockstore/remoteblockstore.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@ import (
"fmt"
"strings"

"github.com/filecoin-project/boost/metrics"
"github.com/filecoin-project/boost/tracing"
blocks "github.com/ipfs/go-block-format"
"github.com/ipfs/go-cid"
blockstore "github.com/ipfs/go-ipfs-blockstore"
format "github.com/ipfs/go-ipld-format"
logging "github.com/ipfs/go-log/v2"
"go.opencensus.io/stats"
"go.opentelemetry.io/otel/attribute"
)

Expand Down Expand Up @@ -40,37 +42,52 @@ func (ro *RemoteBlockstore) Get(ctx context.Context, c cid.Cid) (b blocks.Block,
ctx, span := tracing.Tracer.Start(ctx, "rbls.get")
defer span.End()
span.SetAttributes(attribute.String("cid", c.String()))
stats.Record(ctx, metrics.BitswapRblsGetRequestCount.M(1))

log.Debugw("Get", "cid", c)
data, err := ro.api.BlockstoreGet(ctx, c)
err = normalizeError(err)
log.Debugw("Get response", "cid", c, "error", err)
if err != nil {
stats.Record(ctx, metrics.BitswapRblsGetFailResponseCount.M(1))
return nil, err
}
stats.Record(ctx, metrics.BitswapRblsGetSuccessResponseCount.M(1))
return blocks.NewBlockWithCid(data, c)
}

func (ro *RemoteBlockstore) Has(ctx context.Context, c cid.Cid) (bool, error) {
ctx, span := tracing.Tracer.Start(ctx, "rbls.has")
defer span.End()
span.SetAttributes(attribute.String("cid", c.String()))
stats.Record(ctx, metrics.BitswapRblsHasRequestCount.M(1))

log.Debugw("Has", "cid", c)
has, err := ro.api.BlockstoreHas(ctx, c)
log.Debugw("Has response", "cid", c, "has", has, "error", err)
if err != nil {
stats.Record(ctx, metrics.BitswapRblsHasFailResponseCount.M(1))
} else {
stats.Record(ctx, metrics.BitswapRblsHasSuccessResponseCount.M(1))
}
return has, err
}

func (ro *RemoteBlockstore) GetSize(ctx context.Context, c cid.Cid) (int, error) {
ctx, span := tracing.Tracer.Start(ctx, "rbls.get_size")
defer span.End()
span.SetAttributes(attribute.String("cid", c.String()))
stats.Record(ctx, metrics.BitswapRblsGetSizeRequestCount.M(1))

log.Debugw("GetSize", "cid", c)
size, err := ro.api.BlockstoreGetSize(ctx, c)
err = normalizeError(err)
log.Debugw("GetSize response", "cid", c, "size", size, "error", err)
if err != nil {
stats.Record(ctx, metrics.BitswapRblsGetSizeFailResponseCount.M(1))
} else {
stats.Record(ctx, metrics.BitswapRblsGetSizeSuccessResponseCount.M(1))
}
return size, err
}

Expand Down
24 changes: 23 additions & 1 deletion cmd/booster-bitswap/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
cliutil "github.com/filecoin-project/boost/cli/util"
"github.com/filecoin-project/boost/cmd/booster-bitswap/blockfilter"
"github.com/filecoin-project/boost/cmd/booster-bitswap/remoteblockstore"
"github.com/filecoin-project/boost/metrics"
"github.com/filecoin-project/boost/tracing"
"github.com/filecoin-project/go-jsonrpc"
lcli "github.com/filecoin-project/lotus/cli"
Expand All @@ -28,11 +29,21 @@ var runCmd = &cli.Command{
Name: "pprof",
Usage: "run pprof web server on localhost:6070",
},
&cli.UintFlag{
Name: "pprof-port",
Usage: "the http port to serve pprof on",
Value: 6070,
},
&cli.UintFlag{
Name: "port",
Usage: "the port to listen for bitswap requests on",
Value: 8888,
},
&cli.UintFlag{
Name: "metrics-port",
Usage: "the http port to serve prometheus metrics on",
Value: 9696,
},
&cli.StringFlag{
Name: "api-boost",
Usage: "the endpoint for the boost API",
Expand All @@ -55,8 +66,9 @@ var runCmd = &cli.Command{
},
Action: func(cctx *cli.Context) error {
if cctx.Bool("pprof") {
pprofPort := cctx.Int("pprof-port")
go func() {
err := http.ListenAndServe("localhost:6070", nil)
err := http.ListenAndServe(fmt.Sprintf("localhost:%d", pprofPort), nil)
if err != nil {
log.Error(err)
}
Expand Down Expand Up @@ -119,6 +131,16 @@ var runCmd = &cli.Command{
return err
}

// Start the metrics web server
metricsPort := cctx.Int("metrics-port")
log.Infof("Starting booster-bitswap metrics web server on port %d", metricsPort)
http.Handle("/metrics", metrics.Exporter("booster_bitswap")) // metrics server
go func() {
if err := http.ListenAndServe(fmt.Sprintf("0.0.0.0:%d", metricsPort), nil); err != nil {
log.Errorf("could not start prometheus metric exporter server: %s", err)
}
}()

// Monitor for shutdown.
<-ctx.Done()

Expand Down
3 changes: 3 additions & 0 deletions docker/monitoring/prometheus.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ scrape_configs:
- job_name: 'booster-http'
static_configs:
- targets: [ 'booster-http:7777' ]
- job_name: 'booster-bitswap'
static_configs:
- targets: [ 'booster-bitswap:9696' ]
- job_name: 'lotus-miner'
metrics_path: "/debug/metrics"
static_configs:
Expand Down
58 changes: 58 additions & 0 deletions metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,17 @@ var (
HttpPieceByCid400ResponseCount = stats.Int64("http/piece_by_cid_400_response_count", "Counter of /piece/<piece-cid> 400 responses", stats.UnitDimensionless)
HttpPieceByCid404ResponseCount = stats.Int64("http/piece_by_cid_404_response_count", "Counter of /piece/<piece-cid> 404 responses", stats.UnitDimensionless)
HttpPieceByCid500ResponseCount = stats.Int64("http/piece_by_cid_500_response_count", "Counter of /piece/<piece-cid> 500 responses", stats.UnitDimensionless)

// bitswap
BitswapRblsGetRequestCount = stats.Int64("bitswap/rbls_get_request_count", "Counter of RemoteBlockstore Get requests", stats.UnitDimensionless)
BitswapRblsGetSuccessResponseCount = stats.Int64("bitswap/rbls_get_success_response_count", "Counter of successful RemoteBlockstore Get responses", stats.UnitDimensionless)
BitswapRblsGetFailResponseCount = stats.Int64("bitswap/rbls_get_fail_response_count", "Counter of failed RemoteBlockstore Get responses", stats.UnitDimensionless)
BitswapRblsGetSizeRequestCount = stats.Int64("bitswap/rbls_getsize_request_count", "Counter of RemoteBlockstore GetSize requests", stats.UnitDimensionless)
BitswapRblsGetSizeSuccessResponseCount = stats.Int64("bitswap/rbls_getsize_success_response_count", "Counter of successful RemoteBlockstore GetSize responses", stats.UnitDimensionless)
BitswapRblsGetSizeFailResponseCount = stats.Int64("bitswap/rbls_getsize_fail_response_count", "Counter of failed RemoteBlockstore GetSize responses", stats.UnitDimensionless)
BitswapRblsHasRequestCount = stats.Int64("bitswap/rbls_has_request_count", "Counter of RemoteBlockstore Has requests", stats.UnitDimensionless)
BitswapRblsHasSuccessResponseCount = stats.Int64("bitswap/rbls_has_success_response_count", "Counter of successful RemoteBlockstore Has responses", stats.UnitDimensionless)
BitswapRblsHasFailResponseCount = stats.Int64("bitswap/rbls_has_fail_response_count", "Counter of failed RemoteBlockstore Has responses", stats.UnitDimensionless)
)

var (
Expand Down Expand Up @@ -181,6 +192,44 @@ var (
Aggregation: view.Count(),
}

// bitswap
BitswapRblsGetRequestCountView = &view.View{
Measure: BitswapRblsGetRequestCount,
Aggregation: view.Count(),
}
BitswapRblsGetSuccessResponseCountView = &view.View{
Measure: BitswapRblsGetSuccessResponseCount,
Aggregation: view.Count(),
}
BitswapRblsGetFailResponseCountView = &view.View{
Measure: BitswapRblsGetFailResponseCount,
Aggregation: view.Count(),
}
BitswapRblsGetSizeRequestCountView = &view.View{
Measure: BitswapRblsGetSizeRequestCount,
Aggregation: view.Count(),
}
BitswapRblsGetSizeSuccessResponseCountView = &view.View{
Measure: BitswapRblsGetSizeSuccessResponseCount,
Aggregation: view.Count(),
}
BitswapRblsGetSizeFailResponseCountView = &view.View{
Measure: BitswapRblsGetSizeFailResponseCount,
Aggregation: view.Count(),
}
BitswapRblsHasRequestCountView = &view.View{
Measure: BitswapRblsHasRequestCount,
Aggregation: view.Count(),
}
BitswapRblsHasSuccessResponseCountView = &view.View{
Measure: BitswapRblsHasSuccessResponseCount,
Aggregation: view.Count(),
}
BitswapRblsHasFailResponseCountView = &view.View{
Measure: BitswapRblsHasFailResponseCount,
Aggregation: view.Count(),
}

InfoView = &view.View{
Name: "info",
Description: "Lotus node information",
Expand Down Expand Up @@ -463,6 +512,15 @@ var DefaultViews = func() []*view.View {
HttpPieceByCid400ResponseCountView,
HttpPieceByCid404ResponseCountView,
HttpPieceByCid500ResponseCountView,
BitswapRblsGetRequestCountView,
BitswapRblsGetSuccessResponseCountView,
BitswapRblsGetFailResponseCountView,
BitswapRblsGetSizeRequestCountView,
BitswapRblsGetSizeSuccessResponseCountView,
BitswapRblsGetSizeFailResponseCountView,
BitswapRblsHasRequestCountView,
BitswapRblsHasSuccessResponseCountView,
BitswapRblsHasFailResponseCountView,
lotusmetrics.DagStorePRBytesDiscardedView,
lotusmetrics.DagStorePRBytesRequestedView,
lotusmetrics.DagStorePRDiscardCountView,
Expand Down

0 comments on commit a240606

Please sign in to comment.