Skip to content

Commit

Permalink
feat: Conditionally emit metrics based on enablement (backport #19903) (
Browse files Browse the repository at this point in the history
#20017)

Co-authored-by: Lucas Francisco López <lucaslopezf@gmail.com>
Co-authored-by: Julien Robert <julien@rbrt.fr>
  • Loading branch information
3 people authored Apr 12, 2024
1 parent f76c659 commit 974a24c
Show file tree
Hide file tree
Showing 20 changed files with 116 additions and 37 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ Ref: https://keepachangelog.com/en/1.0.0/

### Improvements

* (telemetry) [#19903](https://github.com/cosmos/cosmos-sdk/pull/19903) Conditionally emit metrics based on enablement.
* **Introduction of `Now` Function**: Added a new function called `Now` to the telemetry package. It returns the current system time if telemetry is enabled, or a zero time if telemetry is not enabled.
* **Atomic Global Variable**: Implemented an atomic global variable to manage the state of telemetry's enablement. This ensures thread safety for the telemetry state.
* **Conditional Telemetry Emission**: All telemetry functions have been updated to emit metrics only when telemetry is enabled. They perform a check with `isTelemetryEnabled()` and return early if telemetry is disabled, minimizing unnecessary operations and overhead.
* (deps) [#19810](https://github.com/cosmos/cosmos-sdk/pull/19810) Upgrade prometheus version and fix API breaking change due to prometheus bump.
* (deps) [#19810](https://github.com/cosmos/cosmos-sdk/pull/19810) Bump `cosmossdk.io/store` to v1.1.0.
* (server) [#19884](https://github.com/cosmos/cosmos-sdk/pull/19884) Add start customizability to start command options.
Expand Down
2 changes: 1 addition & 1 deletion baseapp/abci.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ func (app *BaseApp) Query(_ context.Context, req *abci.RequestQuery) (resp *abci

telemetry.IncrCounter(1, "query", "count")
telemetry.IncrCounter(1, "query", req.Path)
defer telemetry.MeasureSince(time.Now(), req.Path)
defer telemetry.MeasureSince(telemetry.Now(), req.Path)

if req.Path == QueryPathBroadcastTx {
return sdkerrors.QueryResult(errorsmod.Wrap(sdkerrors.ErrInvalidRequest, "can't route a broadcast tx message"), app.trace), nil
Expand Down
2 changes: 1 addition & 1 deletion client/v2/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ require (
cosmossdk.io/api v0.7.3
cosmossdk.io/core v0.11.0
cosmossdk.io/depinject v1.0.0-alpha.4
cosmossdk.io/math v1.3.0
cosmossdk.io/x/tx v0.13.1
github.com/cockroachdb/errors v1.11.1
github.com/cosmos/cosmos-proto v1.0.0-beta.4
Expand All @@ -22,7 +23,6 @@ require (
cosmossdk.io/collections v0.4.0 // indirect
cosmossdk.io/errors v1.0.1 // indirect
cosmossdk.io/log v1.3.1 // indirect
cosmossdk.io/math v1.3.0 // indirect
cosmossdk.io/store v1.1.0 // indirect
filippo.io/edwards25519 v1.0.0 // indirect
github.com/99designs/go-keychain v0.0.0-20191008050251-8e49817e8af4 // indirect
Expand Down
4 changes: 0 additions & 4 deletions server/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -536,10 +536,6 @@ func startAPIServer(
}

func startTelemetry(cfg serverconfig.Config) (*telemetry.Metrics, error) {
if !cfg.Telemetry.Enabled {
return nil, nil
}

return telemetry.New(cfg.Telemetry)
}

Expand Down
10 changes: 10 additions & 0 deletions telemetry/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,15 @@ import (
"github.com/prometheus/common/expfmt"
)

// globalTelemetryEnabled is a private variable that stores the telemetry enabled state.
// It is set on initialization and does not change for the lifetime of the program.
var globalTelemetryEnabled bool

// IsTelemetryEnabled provides controlled access to check if telemetry is enabled.
func IsTelemetryEnabled() bool {
return globalTelemetryEnabled
}

// globalLabels defines the set of global labels that will be applied to all
// metrics emitted using the telemetry package function wrappers.
var globalLabels = []metrics.Label{}
Expand Down Expand Up @@ -95,6 +104,7 @@ type GatherResponse struct {

// New creates a new instance of Metrics
func New(cfg Config) (_ *Metrics, rerr error) {
globalTelemetryEnabled = cfg.Enabled
if !cfg.Enabled {
return nil, nil
}
Expand Down
37 changes: 37 additions & 0 deletions telemetry/wrapper.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ func NewLabel(name, value string) metrics.Label {
// metric for a module with a given set of keys. If any global labels are defined,
// they will be added to the module label.
func ModuleMeasureSince(module string, start time.Time, keys ...string) {
if !IsTelemetryEnabled() {
return
}

metrics.MeasureSinceWithLabels(
keys,
start.UTC(),
Expand All @@ -35,6 +39,10 @@ func ModuleMeasureSince(module string, start time.Time, keys ...string) {
// module with a given set of keys. If any global labels are defined, they will
// be added to the module label.
func ModuleSetGauge(module string, val float32, keys ...string) {
if !IsTelemetryEnabled() {
return
}

metrics.SetGaugeWithLabels(
keys,
val,
Expand All @@ -45,29 +53,58 @@ func ModuleSetGauge(module string, val float32, keys ...string) {
// IncrCounter provides a wrapper functionality for emitting a counter metric with
// global labels (if any).
func IncrCounter(val float32, keys ...string) {
if !IsTelemetryEnabled() {
return
}

metrics.IncrCounterWithLabels(keys, val, globalLabels)
}

// IncrCounterWithLabels provides a wrapper functionality for emitting a counter
// metric with global labels (if any) along with the provided labels.
func IncrCounterWithLabels(keys []string, val float32, labels []metrics.Label) {
if !IsTelemetryEnabled() {
return
}

metrics.IncrCounterWithLabels(keys, val, append(labels, globalLabels...))
}

// SetGauge provides a wrapper functionality for emitting a gauge metric with
// global labels (if any).
func SetGauge(val float32, keys ...string) {
if !IsTelemetryEnabled() {
return
}

metrics.SetGaugeWithLabels(keys, val, globalLabels)
}

// SetGaugeWithLabels provides a wrapper functionality for emitting a gauge
// metric with global labels (if any) along with the provided labels.
func SetGaugeWithLabels(keys []string, val float32, labels []metrics.Label) {
if !IsTelemetryEnabled() {
return
}

metrics.SetGaugeWithLabels(keys, val, append(labels, globalLabels...))
}

// MeasureSince provides a wrapper functionality for emitting a a time measure
// metric with global labels (if any).
func MeasureSince(start time.Time, keys ...string) {
if !IsTelemetryEnabled() {
return
}

metrics.MeasureSinceWithLabels(keys, start.UTC(), globalLabels)
}

// Now return the current time if telemetry is enabled or a zero time if it's not
func Now() time.Time {
if !IsTelemetryEnabled() {
return time.Time{}
}

return time.Now()
}
51 changes: 51 additions & 0 deletions telemetry/wrapper_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package telemetry

import (
"sync"
"testing"
"time"

"github.com/stretchr/testify/assert"
)

var mu sync.Mutex

func initTelemetry(v bool) {
globalTelemetryEnabled = v
}

// Reset the global state to a known disabled state before each test.
func setupTest(t *testing.T) {
t.Helper()
mu.Lock() // Ensure no other test can modify global state at the same time.
defer mu.Unlock()
initTelemetry(false)
}

// TestNow tests the Now function when telemetry is enabled and disabled.
func TestNow(t *testing.T) {
setupTest(t) // Locks the mutex to avoid race condition.

initTelemetry(true)
telemetryTime := Now()
assert.NotEqual(t, time.Time{}, telemetryTime, "Now() should not return zero time when telemetry is enabled")

setupTest(t) // Reset the global state and lock the mutex again.

initTelemetry(false)
telemetryTime = Now()
assert.Equal(t, time.Time{}, telemetryTime, "Now() should return zero time when telemetry is disabled")
}

// TestIsTelemetryEnabled tests the IsTelemetryEnabled function.
func TestIsTelemetryEnabled(t *testing.T) {
setupTest(t) // Locks the mutex to avoid race condition.

initTelemetry(true)
assert.True(t, IsTelemetryEnabled(), "IsTelemetryEnabled() should return true when globalTelemetryEnabled is set to true")

setupTest(t) // Reset the global state and lock the mutex again.

initTelemetry(false)
assert.False(t, IsTelemetryEnabled(), "IsTelemetryEnabled() should return false when globalTelemetryEnabled is set to false")
}
4 changes: 0 additions & 4 deletions x/bank/module.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"context"
"encoding/json"
"fmt"
"time"

gwruntime "github.com/grpc-ecosystem/grpc-gateway/runtime"
"github.com/spf13/cobra"
Expand All @@ -19,7 +18,6 @@ import (
"github.com/cosmos/cosmos-sdk/client"
"github.com/cosmos/cosmos-sdk/codec"
codectypes "github.com/cosmos/cosmos-sdk/codec/types"
"github.com/cosmos/cosmos-sdk/telemetry"
sdk "github.com/cosmos/cosmos-sdk/types"
"github.com/cosmos/cosmos-sdk/types/module"
simtypes "github.com/cosmos/cosmos-sdk/types/simulation"
Expand Down Expand Up @@ -153,10 +151,8 @@ func (AppModule) QuerierRoute() string { return types.RouterKey }
// InitGenesis performs genesis initialization for the bank module. It returns
// no validator updates.
func (am AppModule) InitGenesis(ctx sdk.Context, cdc codec.JSONCodec, data json.RawMessage) {
start := time.Now()
var genesisState types.GenesisState
cdc.MustUnmarshalJSON(data, &genesisState)
telemetry.MeasureSince(start, "InitGenesis", "crisis", "unmarshal")

am.keeper.InitGenesis(ctx, &genesisState)
}
Expand Down
4 changes: 0 additions & 4 deletions x/circuit/module.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"context"
"encoding/json"
"fmt"
"time"

gwruntime "github.com/grpc-ecosystem/grpc-gateway/runtime"

Expand All @@ -21,7 +20,6 @@ import (
"github.com/cosmos/cosmos-sdk/codec"
codectypes "github.com/cosmos/cosmos-sdk/codec/types"
"github.com/cosmos/cosmos-sdk/runtime"
"github.com/cosmos/cosmos-sdk/telemetry"
sdk "github.com/cosmos/cosmos-sdk/types"
"github.com/cosmos/cosmos-sdk/types/module"
authtypes "github.com/cosmos/cosmos-sdk/x/auth/types"
Expand Down Expand Up @@ -111,10 +109,8 @@ func (AppModule) ConsensusVersion() uint64 { return ConsensusVersion }
// InitGenesis performs genesis initialization for the circuit module. It returns
// no validator updates.
func (am AppModule) InitGenesis(ctx sdk.Context, cdc codec.JSONCodec, data json.RawMessage) {
start := time.Now()
var genesisState types.GenesisState
cdc.MustUnmarshalJSON(data, &genesisState)
telemetry.MeasureSince(start, "InitGenesis", "crisis", "unmarshal")

am.keeper.InitGenesis(ctx, &genesisState)
}
Expand Down
3 changes: 1 addition & 2 deletions x/crisis/abci.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package crisis

import (
"context"
"time"

"github.com/cosmos/cosmos-sdk/telemetry"
sdk "github.com/cosmos/cosmos-sdk/types"
Expand All @@ -12,7 +11,7 @@ import (

// check all registered invariants
func EndBlocker(ctx context.Context, k keeper.Keeper) {
defer telemetry.ModuleMeasureSince(types.ModuleName, time.Now(), telemetry.MetricKeyEndBlocker)
defer telemetry.ModuleMeasureSince(types.ModuleName, telemetry.Now(), telemetry.MetricKeyEndBlocker)

sdkCtx := sdk.UnwrapSDKContext(ctx)
if k.InvCheckPeriod() == 0 || sdkCtx.BlockHeight()%int64(k.InvCheckPeriod()) != 0 {
Expand Down
4 changes: 0 additions & 4 deletions x/crisis/module.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"context"
"encoding/json"
"fmt"
"time"

gwruntime "github.com/grpc-ecosystem/grpc-gateway/runtime"
"github.com/spf13/cast"
Expand All @@ -21,7 +20,6 @@ import (
codectypes "github.com/cosmos/cosmos-sdk/codec/types"
"github.com/cosmos/cosmos-sdk/server"
servertypes "github.com/cosmos/cosmos-sdk/server/types"
"github.com/cosmos/cosmos-sdk/telemetry"
sdk "github.com/cosmos/cosmos-sdk/types"
"github.com/cosmos/cosmos-sdk/types/module"
authtypes "github.com/cosmos/cosmos-sdk/x/auth/types"
Expand Down Expand Up @@ -139,10 +137,8 @@ func (am AppModule) RegisterServices(cfg module.Configurator) {
// InitGenesis performs genesis initialization for the crisis module. It returns
// no validator updates.
func (am AppModule) InitGenesis(ctx sdk.Context, cdc codec.JSONCodec, data json.RawMessage) {
start := time.Now()
var genesisState types.GenesisState
cdc.MustUnmarshalJSON(data, &genesisState)
telemetry.MeasureSince(start, "InitGenesis", "crisis", "unmarshal")

am.keeper.InitGenesis(ctx, &genesisState)
if !am.skipGenesisInvariants {
Expand Down
4 changes: 1 addition & 3 deletions x/distribution/abci.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
package distribution

import (
"time"

"github.com/cosmos/cosmos-sdk/telemetry"
sdk "github.com/cosmos/cosmos-sdk/types"
"github.com/cosmos/cosmos-sdk/x/distribution/keeper"
Expand All @@ -12,7 +10,7 @@ import (
// BeginBlocker sets the proposer for determining distribution during endblock
// and distribute rewards for the previous block.
func BeginBlocker(ctx sdk.Context, k keeper.Keeper) error {
defer telemetry.ModuleMeasureSince(types.ModuleName, time.Now(), telemetry.MetricKeyBeginBlocker)
defer telemetry.ModuleMeasureSince(types.ModuleName, telemetry.Now(), telemetry.MetricKeyBeginBlocker)

// determine the total power signing the block
var previousTotalPower int64
Expand Down
3 changes: 1 addition & 2 deletions x/evidence/keeper/abci.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package keeper
import (
"context"
"fmt"
"time"

"cosmossdk.io/core/comet"
"cosmossdk.io/x/evidence/types"
Expand All @@ -15,7 +14,7 @@ import (
// BeginBlocker iterates through and handles any newly discovered evidence of
// misbehavior submitted by CometBFT. Currently, only equivocation is handled.
func (k Keeper) BeginBlocker(ctx context.Context) error {
defer telemetry.ModuleMeasureSince(types.ModuleName, time.Now(), telemetry.MetricKeyBeginBlocker)
defer telemetry.ModuleMeasureSince(types.ModuleName, telemetry.Now(), telemetry.MetricKeyBeginBlocker)

bi := k.cometInfo.GetCometBlockInfo(ctx)
if bi == nil {
Expand Down
2 changes: 1 addition & 1 deletion x/gov/abci.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import (

// EndBlocker called every block, process inflation, update validator set.
func EndBlocker(ctx sdk.Context, keeper *keeper.Keeper) error {
defer telemetry.ModuleMeasureSince(types.ModuleName, time.Now(), telemetry.MetricKeyEndBlocker)
defer telemetry.ModuleMeasureSince(types.ModuleName, telemetry.Now(), telemetry.MetricKeyEndBlocker)

logger := ctx.Logger().With("module", "x/"+types.ModuleName)
// delete dead proposals from store and returns theirs deposits.
Expand Down
3 changes: 1 addition & 2 deletions x/mint/abci.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package mint

import (
"context"
"time"

"github.com/cosmos/cosmos-sdk/telemetry"
sdk "github.com/cosmos/cosmos-sdk/types"
Expand All @@ -12,7 +11,7 @@ import (

// BeginBlocker mints new tokens for the previous block.
func BeginBlocker(ctx context.Context, k keeper.Keeper, ic types.InflationCalculationFn) error {
defer telemetry.ModuleMeasureSince(types.ModuleName, time.Now(), telemetry.MetricKeyBeginBlocker)
defer telemetry.ModuleMeasureSince(types.ModuleName, telemetry.Now(), telemetry.MetricKeyBeginBlocker)

// fetch stored minter & params
minter, err := k.Minter.Get(ctx)
Expand Down
3 changes: 1 addition & 2 deletions x/slashing/abci.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package slashing

import (
"context"
"time"

"cosmossdk.io/core/comet"

Expand All @@ -15,7 +14,7 @@ import (
// BeginBlocker check for infraction evidence or downtime of validators
// on every begin block
func BeginBlocker(ctx context.Context, k keeper.Keeper) error {
defer telemetry.ModuleMeasureSince(types.ModuleName, time.Now(), telemetry.MetricKeyBeginBlocker)
defer telemetry.ModuleMeasureSince(types.ModuleName, telemetry.Now(), telemetry.MetricKeyBeginBlocker)

// Iterate over all the validators which *should* have signed this block
// store whether or not they have actually signed it and slash/unbond any
Expand Down
Loading

0 comments on commit 974a24c

Please sign in to comment.