-
Notifications
You must be signed in to change notification settings - Fork 130
RocksDB Statistics in Metrics #1169
Changes from 17 commits
84a1310
7130281
968f1c6
5ec4f31
fe2731c
41b2ea8
427370a
284611b
d0c02c7
144f332
dd7e72a
c4d9a4a
917a375
f99ecb0
363ca4f
54cffa1
d48e72e
8fe3c6e
59542cf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,198 @@ | ||
package tech.pegasys.pantheon.services.kvstore; | ||
|
||
import static tech.pegasys.pantheon.metrics.MetricCategory.KVSTORE_ROCKSDB_STATS; | ||
|
||
import tech.pegasys.pantheon.metrics.prometheus.PrometheusMetricsSystem; | ||
|
||
import java.util.Arrays; | ||
import java.util.Collections; | ||
import java.util.List; | ||
|
||
import io.prometheus.client.Collector; | ||
import org.rocksdb.HistogramData; | ||
import org.rocksdb.HistogramType; | ||
import org.rocksdb.Statistics; | ||
import org.rocksdb.TickerType; | ||
|
||
class RocksDBStats { | ||
|
||
static final List<String> LABELS = Collections.singletonList("quantile"); | ||
static final List<String> LABEL_50 = Collections.singletonList("0.5"); | ||
static final List<String> LABEL_95 = Collections.singletonList("0.95"); | ||
static final List<String> LABEL_99 = Collections.singletonList("0.99"); | ||
|
||
// Tickers - RocksDB equivalent of counters | ||
static final TickerType[] TICKERS = { | ||
TickerType.BLOCK_CACHE_ADD, | ||
TickerType.BLOCK_CACHE_HIT, | ||
TickerType.BLOCK_CACHE_ADD_FAILURES, | ||
TickerType.BLOCK_CACHE_INDEX_MISS, | ||
TickerType.BLOCK_CACHE_INDEX_HIT, | ||
TickerType.BLOCK_CACHE_INDEX_ADD, | ||
TickerType.BLOCK_CACHE_INDEX_BYTES_INSERT, | ||
TickerType.BLOCK_CACHE_INDEX_BYTES_EVICT, | ||
TickerType.BLOCK_CACHE_FILTER_MISS, | ||
TickerType.BLOCK_CACHE_FILTER_HIT, | ||
TickerType.BLOCK_CACHE_FILTER_ADD, | ||
TickerType.BLOCK_CACHE_FILTER_BYTES_INSERT, | ||
TickerType.BLOCK_CACHE_FILTER_BYTES_EVICT, | ||
TickerType.BLOCK_CACHE_DATA_MISS, | ||
TickerType.BLOCK_CACHE_DATA_HIT, | ||
TickerType.BLOCK_CACHE_DATA_ADD, | ||
TickerType.BLOCK_CACHE_DATA_BYTES_INSERT, | ||
TickerType.BLOCK_CACHE_BYTES_READ, | ||
TickerType.BLOCK_CACHE_BYTES_WRITE, | ||
TickerType.BLOOM_FILTER_USEFUL, | ||
TickerType.PERSISTENT_CACHE_HIT, | ||
TickerType.PERSISTENT_CACHE_MISS, | ||
TickerType.SIM_BLOCK_CACHE_HIT, | ||
TickerType.SIM_BLOCK_CACHE_MISS, | ||
TickerType.MEMTABLE_HIT, | ||
TickerType.MEMTABLE_MISS, | ||
TickerType.GET_HIT_L0, | ||
TickerType.GET_HIT_L1, | ||
TickerType.GET_HIT_L2_AND_UP, | ||
TickerType.COMPACTION_KEY_DROP_NEWER_ENTRY, | ||
TickerType.COMPACTION_KEY_DROP_OBSOLETE, | ||
TickerType.COMPACTION_KEY_DROP_RANGE_DEL, | ||
TickerType.COMPACTION_KEY_DROP_USER, | ||
TickerType.COMPACTION_RANGE_DEL_DROP_OBSOLETE, | ||
TickerType.NUMBER_KEYS_WRITTEN, | ||
TickerType.NUMBER_KEYS_READ, | ||
TickerType.NUMBER_KEYS_UPDATED, | ||
TickerType.BYTES_WRITTEN, | ||
TickerType.BYTES_READ, | ||
TickerType.NUMBER_DB_SEEK, | ||
TickerType.NUMBER_DB_NEXT, | ||
TickerType.NUMBER_DB_PREV, | ||
TickerType.NUMBER_DB_SEEK_FOUND, | ||
TickerType.NUMBER_DB_NEXT_FOUND, | ||
TickerType.NUMBER_DB_PREV_FOUND, | ||
TickerType.ITER_BYTES_READ, | ||
TickerType.NO_FILE_CLOSES, | ||
TickerType.NO_FILE_OPENS, | ||
TickerType.NO_FILE_ERRORS, | ||
// TickerType.STALL_L0_SLOWDOWN_MICROS, | ||
// TickerType.STALL_MEMTABLE_COMPACTION_MICROS, | ||
// TickerType.STALL_L0_NUM_FILES_MICROS, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I assume we want to keep these here for visibility of all of the options? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, to make it clear their absence is intentional. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (optional) Might be worth an explanatory comment :) |
||
TickerType.STALL_MICROS, | ||
TickerType.DB_MUTEX_WAIT_MICROS, | ||
TickerType.RATE_LIMIT_DELAY_MILLIS, | ||
TickerType.NO_ITERATORS, | ||
TickerType.NUMBER_MULTIGET_BYTES_READ, | ||
TickerType.NUMBER_MULTIGET_KEYS_READ, | ||
TickerType.NUMBER_MULTIGET_CALLS, | ||
TickerType.NUMBER_FILTERED_DELETES, | ||
TickerType.NUMBER_MERGE_FAILURES, | ||
TickerType.BLOOM_FILTER_PREFIX_CHECKED, | ||
TickerType.BLOOM_FILTER_PREFIX_USEFUL, | ||
TickerType.NUMBER_OF_RESEEKS_IN_ITERATION, | ||
TickerType.GET_UPDATES_SINCE_CALLS, | ||
TickerType.BLOCK_CACHE_COMPRESSED_MISS, | ||
TickerType.BLOCK_CACHE_COMPRESSED_HIT, | ||
TickerType.BLOCK_CACHE_COMPRESSED_ADD, | ||
TickerType.BLOCK_CACHE_COMPRESSED_ADD_FAILURES, | ||
TickerType.WAL_FILE_SYNCED, | ||
TickerType.WAL_FILE_BYTES, | ||
TickerType.WRITE_DONE_BY_SELF, | ||
TickerType.WRITE_DONE_BY_OTHER, | ||
TickerType.WRITE_TIMEDOUT, | ||
TickerType.WRITE_WITH_WAL, | ||
TickerType.COMPACT_READ_BYTES, | ||
TickerType.COMPACT_WRITE_BYTES, | ||
TickerType.FLUSH_WRITE_BYTES, | ||
TickerType.NUMBER_DIRECT_LOAD_TABLE_PROPERTIES, | ||
TickerType.NUMBER_SUPERVERSION_ACQUIRES, | ||
TickerType.NUMBER_SUPERVERSION_RELEASES, | ||
TickerType.NUMBER_SUPERVERSION_CLEANUPS, | ||
TickerType.NUMBER_BLOCK_COMPRESSED, | ||
TickerType.NUMBER_BLOCK_DECOMPRESSED, | ||
TickerType.NUMBER_BLOCK_NOT_COMPRESSED, | ||
TickerType.MERGE_OPERATION_TOTAL_TIME, | ||
TickerType.FILTER_OPERATION_TOTAL_TIME, | ||
TickerType.ROW_CACHE_HIT, | ||
TickerType.ROW_CACHE_MISS, | ||
TickerType.READ_AMP_ESTIMATE_USEFUL_BYTES, | ||
TickerType.READ_AMP_TOTAL_READ_BYTES, | ||
TickerType.NUMBER_RATE_LIMITER_DRAINS, | ||
TickerType.NUMBER_ITER_SKIP, | ||
TickerType.NUMBER_MULTIGET_KEYS_FOUND, | ||
}; | ||
|
||
// Histograms - treated as prometheus summaries | ||
static final HistogramType[] HISTOGRAMS = { | ||
HistogramType.DB_GET, | ||
HistogramType.DB_WRITE, | ||
HistogramType.COMPACTION_TIME, | ||
HistogramType.SUBCOMPACTION_SETUP_TIME, | ||
HistogramType.TABLE_SYNC_MICROS, | ||
HistogramType.COMPACTION_OUTFILE_SYNC_MICROS, | ||
HistogramType.WAL_FILE_SYNC_MICROS, | ||
HistogramType.MANIFEST_FILE_SYNC_MICROS, | ||
HistogramType.TABLE_OPEN_IO_MICROS, | ||
HistogramType.DB_MULTIGET, | ||
HistogramType.READ_BLOCK_COMPACTION_MICROS, | ||
HistogramType.READ_BLOCK_GET_MICROS, | ||
HistogramType.WRITE_RAW_BLOCK_MICROS, | ||
HistogramType.STALL_L0_SLOWDOWN_COUNT, | ||
HistogramType.STALL_MEMTABLE_COMPACTION_COUNT, | ||
HistogramType.STALL_L0_NUM_FILES_COUNT, | ||
HistogramType.HARD_RATE_LIMIT_DELAY_COUNT, | ||
HistogramType.SOFT_RATE_LIMIT_DELAY_COUNT, | ||
HistogramType.NUM_FILES_IN_SINGLE_COMPACTION, | ||
HistogramType.DB_SEEK, | ||
HistogramType.WRITE_STALL, | ||
HistogramType.SST_READ_MICROS, | ||
HistogramType.NUM_SUBCOMPACTIONS_SCHEDULED, | ||
HistogramType.BYTES_PER_READ, | ||
HistogramType.BYTES_PER_WRITE, | ||
HistogramType.BYTES_PER_MULTIGET, | ||
HistogramType.BYTES_COMPRESSED, | ||
HistogramType.BYTES_DECOMPRESSED, | ||
HistogramType.COMPRESSION_TIMES_NANOS, | ||
HistogramType.DECOMPRESSION_TIMES_NANOS, | ||
HistogramType.READ_NUM_MERGE_OPERANDS, | ||
}; | ||
|
||
static void registerRocksDBMetrics( | ||
final Statistics stats, final PrometheusMetricsSystem metricsSystem) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you move this to the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That would give the metrics package a dependency on RocksDB, and then every package that uses metrics would drag that dependency along as deadweight. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, thats a good point. And that's the case now with vertx. I guess the "right" thing to do would be to have independent |
||
|
||
for (final TickerType ticker : TICKERS) { | ||
final String promCounterName = ticker.name().toLowerCase(); | ||
metricsSystem.createLongGauge( | ||
KVSTORE_ROCKSDB_STATS, | ||
promCounterName, | ||
"RocksDB reported statistics for " + ticker.name(), | ||
() -> stats.getTickerCount(ticker)); | ||
} | ||
|
||
for (final HistogramType histogram : HISTOGRAMS) { | ||
metricsSystem.addCollector(KVSTORE_ROCKSDB_STATS, histogramToCollector(stats, histogram)); | ||
} | ||
} | ||
|
||
private static Collector histogramToCollector( | ||
final Statistics stats, final HistogramType histogram) { | ||
return new Collector() { | ||
final String metricName = | ||
PrometheusMetricsSystem.convertToPrometheusName( | ||
KVSTORE_ROCKSDB_STATS, histogram.name().toLowerCase()); | ||
|
||
@Override | ||
public List<MetricFamilySamples> collect() { | ||
final HistogramData data = stats.getHistogramData(histogram); | ||
return Collections.singletonList( | ||
new MetricFamilySamples( | ||
metricName, | ||
Type.SUMMARY, | ||
"RocksDB histogram for " + metricName, | ||
Arrays.asList( | ||
new MetricFamilySamples.Sample(metricName, LABELS, LABEL_50, data.getMedian()), | ||
new MetricFamilySamples.Sample( | ||
metricName, LABELS, LABEL_95, data.getPercentile95()), | ||
new MetricFamilySamples.Sample( | ||
metricName, LABELS, LABEL_99, data.getPercentile99())))); | ||
} | ||
}; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,7 @@ | |
import tech.pegasys.pantheon.metrics.MetricCategory; | ||
import tech.pegasys.pantheon.metrics.MetricsSystem; | ||
import tech.pegasys.pantheon.metrics.OperationTimer; | ||
import tech.pegasys.pantheon.metrics.prometheus.PrometheusMetricsSystem; | ||
import tech.pegasys.pantheon.services.util.RocksDbUtil; | ||
import tech.pegasys.pantheon.util.bytes.BytesValue; | ||
|
||
|
@@ -27,6 +28,7 @@ | |
import org.apache.logging.log4j.Logger; | ||
import org.rocksdb.Options; | ||
import org.rocksdb.RocksDBException; | ||
import org.rocksdb.Statistics; | ||
import org.rocksdb.TransactionDB; | ||
import org.rocksdb.TransactionDBOptions; | ||
import org.rocksdb.WriteOptions; | ||
|
@@ -45,6 +47,7 @@ public class RocksDbKeyValueStorage implements KeyValueStorage, Closeable { | |
private final OperationTimer writeLatency; | ||
private final OperationTimer commitLatency; | ||
private final Counter rollbackCount; | ||
private final Statistics stats; | ||
|
||
public static KeyValueStorage create( | ||
final RocksDbConfiguration rocksDbConfiguration, final MetricsSystem metricsSystem) | ||
|
@@ -56,33 +59,44 @@ private RocksDbKeyValueStorage( | |
final RocksDbConfiguration rocksDbConfiguration, final MetricsSystem metricsSystem) { | ||
RocksDbUtil.loadNativeLibrary(); | ||
try { | ||
|
||
stats = new Statistics(); | ||
options = | ||
new Options() | ||
.setCreateIfMissing(true) | ||
.setMaxOpenFiles(rocksDbConfiguration.getMaxOpenFiles()) | ||
.setTableFormatConfig(rocksDbConfiguration.getBlockBasedTableConfig()); | ||
.setTableFormatConfig(rocksDbConfiguration.getBlockBasedTableConfig()) | ||
.setStatistics(stats); | ||
|
||
txOptions = new TransactionDBOptions(); | ||
db = TransactionDB.open(options, txOptions, rocksDbConfiguration.getDatabaseDir().toString()); | ||
|
||
readLatency = | ||
metricsSystem.createTimer( | ||
MetricCategory.ROCKSDB, "read_latency_seconds", "Latency for read from RocksDB."); | ||
MetricCategory.KVSTORE_ROCKSDB, | ||
"read_latency_seconds", | ||
"Latency for read from RocksDB."); | ||
removeLatency = | ||
metricsSystem.createTimer( | ||
MetricCategory.ROCKSDB, | ||
MetricCategory.KVSTORE_ROCKSDB, | ||
"remove_latency_seconds", | ||
"Latency of remove requests from RocksDB."); | ||
writeLatency = | ||
metricsSystem.createTimer( | ||
MetricCategory.ROCKSDB, "write_latency_seconds", "Latency for write to RocksDB."); | ||
MetricCategory.KVSTORE_ROCKSDB, | ||
"write_latency_seconds", | ||
"Latency for write to RocksDB."); | ||
commitLatency = | ||
metricsSystem.createTimer( | ||
MetricCategory.ROCKSDB, "commit_latency_seconds", "Latency for commits to RocksDB."); | ||
MetricCategory.KVSTORE_ROCKSDB, | ||
"commit_latency_seconds", | ||
"Latency for commits to RocksDB."); | ||
|
||
if (metricsSystem instanceof PrometheusMetricsSystem) { | ||
RocksDBStats.registerRocksDBMetrics(stats, (PrometheusMetricsSystem) metricsSystem); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you wrap all of this new logic up in a utility class in the metrics package? That way we can reuse this for other RocksDB instances as needed. It looks like you could create a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we keeps the statistics object in rocksdb and treat it like a part of the options then we can have |
||
|
||
metricsSystem.createLongGauge( | ||
MetricCategory.ROCKSDB, | ||
MetricCategory.KVSTORE_ROCKSDB, | ||
"rocks_db_table_readers_memory_bytes", | ||
"Estimated memory used for RocksDB index and filter blocks in bytes", | ||
() -> { | ||
|
@@ -96,7 +110,7 @@ private RocksDbKeyValueStorage( | |
|
||
rollbackCount = | ||
metricsSystem.createCounter( | ||
MetricCategory.ROCKSDB, | ||
MetricCategory.KVSTORE_ROCKSDB, | ||
"rollback_count", | ||
"Number of RocksDB transactions rolled back."); | ||
} catch (final RocksDBException e) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What do you think about moving this to:
tech.pegasys.pantheon.metrics.rocksdb
. We have a similar paradigm for vertx utilities intech.pegasys.pantheon.metrics.vertx
.