-
Notifications
You must be signed in to change notification settings - Fork 2.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Export expvar metrics of badger to the metricsFactory #1704
Changes from 2 commits
7522e22
38595bb
c9544d3
8a71919
ac096cd
680e1e6
2e05a8e
40438c1
7fc7581
b23abb3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,12 +15,18 @@ | |
package badger | ||
|
||
import ( | ||
"expvar" | ||
"flag" | ||
"io/ioutil" | ||
"os" | ||
"strings" | ||
"time" | ||
|
||
"github.com/dgraph-io/badger" | ||
"github.com/dgraph-io/badger/options" | ||
|
||
// init() badger's metrics to make them available in Initialize() | ||
_ "github.com/dgraph-io/badger/y" | ||
"github.com/spf13/viper" | ||
"github.com/uber/jaeger-lib/metrics" | ||
"go.uber.org/zap" | ||
|
@@ -58,6 +64,9 @@ type Factory struct { | |
LastMaintenanceRun metrics.Gauge | ||
// LastValueLogCleaned stores the timestamp (UnixNano) of the previous ValueLogGC run | ||
LastValueLogCleaned metrics.Gauge | ||
|
||
// Expose badger's internal expvar metrics, which are all gauge's at this point | ||
badgerMetrics map[string]metrics.Gauge | ||
} | ||
} | ||
|
||
|
@@ -84,6 +93,7 @@ func (f *Factory) Initialize(metricsFactory metrics.Factory, logger *zap.Logger) | |
f.logger = logger | ||
|
||
opts := badger.DefaultOptions | ||
opts.TableLoadingMode = options.MemoryMap | ||
|
||
if f.Options.primary.Ephemeral { | ||
opts.SyncWrites = false | ||
|
@@ -118,7 +128,10 @@ func (f *Factory) Initialize(metricsFactory metrics.Factory, logger *zap.Logger) | |
f.metrics.LastMaintenanceRun = metricsFactory.Gauge(metrics.Options{Name: lastMaintenanceRunName}) | ||
f.metrics.LastValueLogCleaned = metricsFactory.Gauge(metrics.Options{Name: lastValueLogCleanedName}) | ||
|
||
f.registerBadgerExpvarMetrics(metricsFactory) | ||
|
||
go f.maintenance() | ||
go f.metricsCopier() | ||
|
||
logger.Info("Badger storage configuration", zap.Any("configuration", opts)) | ||
|
||
|
@@ -150,7 +163,8 @@ func (f *Factory) CreateDependencyReader() (dependencystore.Reader, error) { | |
|
||
// Close Implements io.Closer and closes the underlying storage | ||
func (f *Factory) Close() error { | ||
f.maintenanceDone <- true | ||
f.maintenanceDone <- true // maintenance close | ||
f.maintenanceDone <- true // metrics close | ||
|
||
err := f.store.Close() | ||
|
||
|
@@ -175,6 +189,7 @@ func (f *Factory) maintenance() { | |
return | ||
case t := <-maintenanceTicker.C: | ||
var err error | ||
|
||
// After there's nothing to clean, the err is raised | ||
for err == nil { | ||
err = f.store.RunValueLogGC(0.5) // 0.5 is selected to rewrite a file if half of it can be discarded | ||
|
@@ -190,3 +205,56 @@ func (f *Factory) maintenance() { | |
} | ||
} | ||
} | ||
|
||
func (f *Factory) metricsCopier() { | ||
metricsTicker := time.NewTicker(f.Options.primary.MetricsUpdateInterval) | ||
defer metricsTicker.Stop() | ||
for { | ||
select { | ||
case <-f.maintenanceDone: | ||
return | ||
case <-metricsTicker.C: | ||
expvar.Do(func(kv expvar.KeyValue) { | ||
if strings.HasPrefix(kv.Key, "badger") { | ||
if intVal, ok := kv.Value.(*expvar.Int); ok { | ||
if g, found := f.metrics.badgerMetrics[kv.Key]; found { | ||
g.Update(intVal.Value()) | ||
} | ||
} else if mapVal, ok := kv.Value.(*expvar.Map); ok { | ||
mapVal.Do(func(innerKv expvar.KeyValue) { | ||
// The metrics we're interested in have only a single inner key (dynamic name) | ||
// and we're only interested in its value | ||
if intVal, ok := innerKv.Value.(*expvar.Int); ok { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is catching the metrics declared like this? NumLSMGets = expvar.NewMap("badger_lsm_level_gets_total") Why does Badger use a map in this case? It looks like it could have more than one entry in the map, but we're collapsing all under the map's name. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. btw, if multiple values are possible, we could put that value into a tag on the metric. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Currently it is storing in the map the directory of the LSM or Value. We have that information in the Factory properties also, but it might indeed make sense to export that information somewhere. Unless there's a place to view properties already in Jaeger? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. With the tags it would look like this:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. as long as there's a guarantee that map has only one entry, it makes more sense to go without the dir name in the label (since it might be different after restart) |
||
if g, found := f.metrics.badgerMetrics[kv.Key]; found { | ||
g.Update(intVal.Value()) | ||
} | ||
} | ||
}) | ||
} | ||
} | ||
}) | ||
} | ||
} | ||
} | ||
|
||
func (f *Factory) registerBadgerExpvarMetrics(metricsFactory metrics.Factory) { | ||
f.metrics.badgerMetrics = make(map[string]metrics.Gauge) | ||
|
||
expvar.Do(func(kv expvar.KeyValue) { | ||
if strings.HasPrefix(kv.Key, "badger") { | ||
if _, ok := kv.Value.(*expvar.Int); ok { | ||
g := metricsFactory.Gauge(metrics.Options{Name: kv.Key}) | ||
f.metrics.badgerMetrics[kv.Key] = g | ||
} else if mapVal, ok := kv.Value.(*expvar.Map); ok { | ||
mapVal.Do(func(innerKv expvar.KeyValue) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. wouldn't this map always be empty at this point? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, since badger.Open() is called before the metrics are fetched. And badger.Open() registers the expvars (it calculates the initial sizes during Open()). |
||
// The metrics we're interested in have only a single inner key (dynamic name) | ||
// and we're only interested in its value | ||
if _, ok = innerKv.Value.(*expvar.Int); ok { | ||
g := metricsFactory.Gauge(metrics.Options{Name: kv.Key}) | ||
f.metrics.badgerMetrics[kv.Key] = g | ||
} | ||
}) | ||
} | ||
} | ||
}) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -144,3 +144,33 @@ func TestMaintenanceCodecov(t *testing.T) { | |
_ = f.store.Close() | ||
waiter() // This should trigger the logging of error | ||
} | ||
|
||
func TestBadgerMetrics(t *testing.T) { | ||
f := NewFactory() | ||
v, command := config.Viperize(f.AddFlags) | ||
command.ParseFlags([]string{ | ||
"--badger.metrics-update-interval=10ms", | ||
}) | ||
f.InitFromViper(v) | ||
mFactory := metricstest.NewFactory(0) | ||
f.Initialize(mFactory, zap.NewNop()) | ||
assert.NotNil(t, f.metrics.badgerMetrics) | ||
_, found := f.metrics.badgerMetrics["badger_memtable_gets_total"] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. would be good to see a test for the expvar.Map metrics as well. |
||
assert.True(t, found) | ||
|
||
waiter := func(previousValue int64) int64 { | ||
sleeps := 0 | ||
_, gs := mFactory.Snapshot() | ||
for gs["adger_memtable_gets_total"] == previousValue && sleeps < 8 { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why "adger"? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Typo. It always sleeps the maximum with that typo, so the test functions correctly - but might take too long. I'll fix that. |
||
// Wait for the scheduler | ||
time.Sleep(time.Duration(50) * time.Millisecond) | ||
sleeps++ | ||
_, gs = mFactory.Snapshot() | ||
} | ||
assert.True(t, gs["badger_memtable_gets_total"] > previousValue) | ||
return gs["badger_memtable_gets_total"] | ||
} | ||
|
||
vlogSize := waiter(0) | ||
assert.True(t, vlogSize > 0) | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shouldn't it be
metricsTicker
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, the ticker is closed when the select ends.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why are we writing twice to the same channel? If it's only meant to be read-once for exit signal, then I think the usual practice for that is to close the channel.