From f81ba306dd37947baf3ef572869755aa0b55e1b5 Mon Sep 17 00:00:00 2001 From: "Kim, JinSan" Date: Thu, 10 Dec 2020 14:51:35 +0900 Subject: [PATCH 1/3] feat: more prometheus metrics for monitoring performance --- mempool/clist_mempool.go | 8 +++++++- mempool/metrics.go | 17 +++++++++++++---- state/execution.go | 27 ++++++++++++++++++++++++--- state/metrics.go | 38 +++++++++++++++++++++++++++++++++++++- 4 files changed, 81 insertions(+), 9 deletions(-) diff --git a/mempool/clist_mempool.go b/mempool/clist_mempool.go index f55d62329..55e3a2327 100644 --- a/mempool/clist_mempool.go +++ b/mempool/clist_mempool.go @@ -7,6 +7,7 @@ import ( "fmt" "sync" "sync/atomic" + "time" abci "github.com/tendermint/tendermint/abci/types" cfg "github.com/tendermint/tendermint/config" @@ -305,7 +306,7 @@ func (mem *CListMempool) globalCb(req *abci.Request, res *abci.Response) { return } - mem.metrics.RecheckTimes.Add(1) + mem.metrics.RecheckCount.Add(1) mem.resCbRecheck(req, res) // update metrics @@ -599,6 +600,7 @@ func (mem *CListMempool) Update( // Either recheck non-committed txs to see if they became invalid // or just notify there're some txs left. + recheckStartTime := time.Now().UnixNano() if mem.Size() > 0 { if mem.config.Recheck { mem.logger.Info("Recheck txs", "numtxs", mem.Size(), "height", height) @@ -610,6 +612,10 @@ func (mem *CListMempool) Update( mem.notifyTxsAvailable() } } + recheckEndTime := time.Now().UnixNano() + + recheckTimeMs := float64(recheckEndTime-recheckStartTime) / 1000000 + mem.metrics.RecheckTime.Set(recheckTimeMs) // Update metrics mem.metrics.Size.Set(float64(mem.Size())) diff --git a/mempool/metrics.go b/mempool/metrics.go index 5e4eaf5ed..39459de69 100644 --- a/mempool/metrics.go +++ b/mempool/metrics.go @@ -23,7 +23,9 @@ type Metrics struct { // Number of failed transactions. FailedTxs metrics.Counter // Number of times transactions are rechecked in the mempool. - RecheckTimes metrics.Counter + RecheckCount metrics.Counter + // Time of recheck transactions in the mempool. + RecheckTime metrics.Gauge } // PrometheusMetrics returns Metrics build using Prometheus client library. @@ -54,12 +56,18 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { Name: "failed_txs", Help: "Number of failed transactions.", }, labels).With(labelsAndValues...), - RecheckTimes: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ + RecheckCount: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, - Name: "recheck_times", + Name: "recheck_count", Help: "Number of times transactions are rechecked in the mempool.", }, labels).With(labelsAndValues...), + RecheckTime: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "recheck_time", + Help: "Time of recheck transactions in the mempool in ms.", + }, labels).With(labelsAndValues...), } } @@ -69,6 +77,7 @@ func NopMetrics() *Metrics { Size: discard.NewGauge(), TxSizeBytes: discard.NewHistogram(), FailedTxs: discard.NewCounter(), - RecheckTimes: discard.NewCounter(), + RecheckCount: discard.NewCounter(), + RecheckTime: discard.NewGauge(), } } diff --git a/state/execution.go b/state/execution.go index cd45484ce..50a7afd89 100644 --- a/state/execution.go +++ b/state/execution.go @@ -131,10 +131,14 @@ func (blockExec *BlockExecutor) ApplyBlock( return state, 0, ErrInvalidBlock(err) } - startTime := time.Now().UnixNano() + execStartTime := time.Now().UnixNano() abciResponses, err := execBlockOnProxyApp(blockExec.logger, blockExec.proxyApp, block, blockExec.db) - endTime := time.Now().UnixNano() - blockExec.metrics.BlockProcessingTime.Observe(float64(endTime-startTime) / 1000000) + execEndTime := time.Now().UnixNano() + + execTimeMs := float64(execEndTime-execStartTime) / 1000000 + blockExec.metrics.BlockProcessingTime.Observe(execTimeMs) + blockExec.metrics.BlockExecutionTime.Set(execTimeMs) + if err != nil { return state, 0, ErrProxyAppConn(err) } @@ -167,7 +171,13 @@ func (blockExec *BlockExecutor) ApplyBlock( } // Lock mempool, commit app state, update mempoool. + commitStartTime := time.Now().UnixNano() appHash, retainHeight, err := blockExec.Commit(state, block, abciResponses.DeliverTxs) + commitEndTime := time.Now().UnixNano() + + commitTimeMs := float64(commitEndTime-commitStartTime) / 1000000 + blockExec.metrics.BlockCommitTime.Set(commitTimeMs) + if err != nil { return state, 0, fmt.Errorf("commit failed for application: %v", err) } @@ -213,7 +223,13 @@ func (blockExec *BlockExecutor) Commit( } // Commit block, get hash back + appCommitStartTime := time.Now().UnixNano() res, err := blockExec.proxyApp.CommitSync() + appCommitEndTime := time.Now().UnixNano() + + appCommitTimeMs := float64(appCommitEndTime-appCommitStartTime) / 1000000 + blockExec.metrics.BlockAppCommitTime.Set(appCommitTimeMs) + if err != nil { blockExec.logger.Error( "Client error during proxyAppConn.CommitSync", @@ -231,6 +247,7 @@ func (blockExec *BlockExecutor) Commit( ) // Update mempool. + updateMempoolStartTime := time.Now().UnixNano() err = blockExec.mempool.Update( block.Height, block.Txs, @@ -238,6 +255,10 @@ func (blockExec *BlockExecutor) Commit( TxPreCheck(state), TxPostCheck(state), ) + updateMempoolEndTime := time.Now().UnixNano() + + updateMempoolTimeMs := float64(updateMempoolEndTime-updateMempoolStartTime) / 1000000 + blockExec.metrics.BlockUpdateMempoolTime.Set(updateMempoolTimeMs) return res.Data, res.RetainHeight, err } diff --git a/state/metrics.go b/state/metrics.go index bcd713f5f..730de6169 100644 --- a/state/metrics.go +++ b/state/metrics.go @@ -17,6 +17,14 @@ const ( type Metrics struct { // Time between BeginBlock and EndBlock. BlockProcessingTime metrics.Histogram + // Time gauge between BeginBlock and EndBlock. + BlockExecutionTime metrics.Gauge + // Time of commit + BlockCommitTime metrics.Gauge + // Time of app commit + BlockAppCommitTime metrics.Gauge + // Time of update mempool + BlockUpdateMempoolTime metrics.Gauge } // PrometheusMetrics returns Metrics build using Prometheus client library. @@ -35,12 +43,40 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { Help: "Time between BeginBlock and EndBlock in ms.", Buckets: stdprometheus.LinearBuckets(1, 10, 10), }, labels).With(labelsAndValues...), + BlockExecutionTime: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "block_execution_time", + Help: "Time between BeginBlock and EndBlock in ms.", + }, labels).With(labelsAndValues...), + BlockCommitTime: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "block_commit_time", + Help: "Time of commit in ms.", + }, labels).With(labelsAndValues...), + BlockAppCommitTime: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "block_app_commit_time", + Help: "Time of app commit in ms.", + }, labels).With(labelsAndValues...), + BlockUpdateMempoolTime: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "block_update_mempool_time", + Help: "Time of update mempool in ms.", + }, labels).With(labelsAndValues...), } } // NopMetrics returns no-op Metrics. func NopMetrics() *Metrics { return &Metrics{ - BlockProcessingTime: discard.NewHistogram(), + BlockProcessingTime: discard.NewHistogram(), + BlockExecutionTime: discard.NewGauge(), + BlockCommitTime: discard.NewGauge(), + BlockAppCommitTime: discard.NewGauge(), + BlockUpdateMempoolTime: discard.NewGauge(), } } From 47543310989b8972ca7659d8fbe33a016baefea2 Mon Sep 17 00:00:00 2001 From: "Kim, JinSan" Date: Thu, 10 Dec 2020 21:49:58 +0900 Subject: [PATCH 2/3] chore: touch --- touch | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 touch diff --git a/touch b/touch new file mode 100644 index 000000000..e69de29bb From 87634dccfb4dbc0d6aebedcdb57a5a7398531c67 Mon Sep 17 00:00:00 2001 From: "Kim, JinSan" Date: Thu, 10 Dec 2020 22:10:45 +0900 Subject: [PATCH 3/3] chore: remove touch --- touch | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 touch diff --git a/touch b/touch deleted file mode 100644 index e69de29bb..000000000