Skip to content

Commit

Permalink
feat(meta cache): stats meta cache unhit in prometheus (risingwavelab…
Browse files Browse the repository at this point in the history
  • Loading branch information
soundOfDestiny authored Apr 10, 2023
1 parent 4929b30 commit fcc069a
Show file tree
Hide file tree
Showing 8 changed files with 83 additions and 16 deletions.
2 changes: 1 addition & 1 deletion docker/dashboards/risingwave-dev-dashboard.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docker/dashboards/risingwave-user-dashboard.json

Large diffs are not rendered by default.

26 changes: 24 additions & 2 deletions grafana/risingwave-dev-dashboard.dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -1338,7 +1338,7 @@ def section_frontend(outer_panels):


def section_hummock(panels):
mete_miss_filter = "type='meta_miss'"
meta_miss_filter = "type='meta_miss'"
meta_total_filter = "type='meta_total'"
data_miss_filter = "type='data_miss'"
data_total_filter = "type='data_total'"
Expand Down Expand Up @@ -1568,7 +1568,7 @@ def section_hummock(panels):
"bloom filter miss rate - {{table_id}} - {{type}} @ {{job}} @ {{instance}}",
),
panels.target(
f"(sum(rate({metric('state_store_sst_store_block_request_counts', mete_miss_filter)}[$__rate_interval])) by (job,instance,table_id)) / (sum(rate({metric('state_store_sst_store_block_request_counts', meta_total_filter)}[$__rate_interval])) by (job,instance,table_id))",
f"(sum(rate({metric('state_store_sst_store_block_request_counts', meta_miss_filter)}[$__rate_interval])) by (job,instance,table_id)) / (sum(rate({metric('state_store_sst_store_block_request_counts', meta_total_filter)}[$__rate_interval])) by (job,instance,table_id))",
"meta cache miss rate - {{table_id}} @ {{job}} @ {{instance}}",
),
panels.target(
Expand Down Expand Up @@ -1750,6 +1750,28 @@ def section_hummock(panels):
),
],
),

panels.timeseries_count(
"Fetch Meta Unhits",
"",
[
panels.target(
f"{metric('state_store_iter_fetch_meta_cache_unhits')}",
"",
),
],
),

panels.timeseries_count(
"Slow Fetch Meta Unhits",
"",
[
panels.target(
f"{metric('state_store_iter_slow_fetch_meta_cache_unhits')}",
"",
),
],
),
]


Expand Down
2 changes: 1 addition & 1 deletion grafana/risingwave-dev-dashboard.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion grafana/risingwave-user-dashboard.json

Large diffs are not rendered by default.

25 changes: 19 additions & 6 deletions src/storage/src/hummock/sstable_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use await_tree::InstrumentAwait;
use bytes::{Buf, BufMut, Bytes};
use fail::fail_point;
use itertools::Itertools;
use risingwave_common::cache::{CachePriority, LruCacheEventListener};
use risingwave_common::cache::{CachePriority, LookupResponse, LruCacheEventListener};
use risingwave_hummock_sdk::{HummockSstableObjectId, OBJECT_SUFFIX};
use risingwave_object_store::object::{
BlockLocation, MonitoredStreamingReader, ObjectError, ObjectMetadata, ObjectStoreRef,
Expand Down Expand Up @@ -354,14 +354,17 @@ impl SstableStore {
self.meta_cache.clear();
}

/// Returns `table_holder`, `local_cache_meta_block_miss` (1 if cache miss) and
/// `local_cache_meta_block_unhit` (1 if not cache hit).
pub async fn sstable_syncable(
&self,
sst: &SstableInfo,
stats: &StoreLocalStatistic,
) -> HummockResult<(TableHolder, u64)> {
) -> HummockResult<(TableHolder, u64, u64)> {
let mut local_cache_meta_block_miss = 0;
let mut local_cache_meta_block_unhit = 0;
let object_id = sst.get_object_id();
let result = self
let lookup_response = self
.meta_cache
.lookup_with_request_dedup::<_, HummockError, _>(
object_id,
Expand Down Expand Up @@ -390,10 +393,20 @@ impl SstableStore {
Ok((Box::new(sst), charge))
}
},
)
);
if !matches!(lookup_response, LookupResponse::Cached(..)) {
local_cache_meta_block_unhit += 1;
}
let result = lookup_response
.verbose_instrument_await("meta_cache_lookup")
.await;
result.map(|table_holder| (table_holder, local_cache_meta_block_miss))
result.map(|table_holder| {
(
table_holder,
local_cache_meta_block_miss,
local_cache_meta_block_unhit,
)
})
}

pub async fn sstable(
Expand All @@ -402,7 +415,7 @@ impl SstableStore {
stats: &mut StoreLocalStatistic,
) -> HummockResult<TableHolder> {
self.sstable_syncable(sst, stats).await.map(
|(table_holder, local_cache_meta_block_miss)| {
|(table_holder, local_cache_meta_block_miss, ..)| {
stats.apply_meta_fetch(local_cache_meta_block_miss);
table_holder
},
Expand Down
19 changes: 16 additions & 3 deletions src/storage/src/hummock/store/version.rs
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,8 @@ impl HummockVersionReader {
}
}

const SLOW_ITER_FETCH_META_DURATION_SECOND: f64 = 5.0;

impl HummockVersionReader {
pub async fn get(
&self,
Expand Down Expand Up @@ -653,12 +655,23 @@ impl HummockVersionReader {
.iter_fetch_meta_duration
.with_label_values(&[table_id_label])
.start_timer();
let mut local_cache_meta_block_unhit = 0;
let mut flatten_resps = vec![None; req_count];
for flatten_req in flatten_reqs {
let (req_index, resp) = flatten_req.await?;
local_cache_meta_block_unhit += resp.2;
flatten_resps[req_count - req_index - 1] = Some(resp);
}
timer.observe_duration();
let fetch_meta_duration_sec = timer.stop_and_record();
self.state_store_metrics
.iter_fetch_meta_cache_unhits
.set(local_cache_meta_block_unhit as i64);
if fetch_meta_duration_sec > SLOW_ITER_FETCH_META_DURATION_SECOND {
tracing::warn!("Fetching meta while creating an iter to read table_id {:?} at epoch {:?} is slow: duration = {:?}s, cache unhits = {:?}.", table_id_string, epoch, fetch_meta_duration_sec, local_cache_meta_block_unhit);
self.state_store_metrics
.iter_slow_fetch_meta_cache_unhits
.set(local_cache_meta_block_unhit as i64);
}

let mut sst_read_options = SstableIteratorReadOptions::from(&read_options);
if read_options.prefetch_options.exhaust_iter {
Expand All @@ -671,7 +684,7 @@ impl HummockVersionReader {
if level_type == LevelType::Nonoverlapping as i32 {
let mut sstables = vec![];
for sstable_info in fetch_meta_req {
let (sstable, local_cache_meta_block_miss) =
let (sstable, local_cache_meta_block_miss, ..) =
flatten_resps.pop().unwrap().unwrap();
assert_eq!(sstable_info.get_object_id(), sstable.value().id);
local_stats.apply_meta_fetch(local_cache_meta_block_miss);
Expand All @@ -697,7 +710,7 @@ impl HummockVersionReader {
} else {
let mut iters = Vec::new();
for sstable_info in fetch_meta_req {
let (sstable, local_cache_meta_block_miss) =
let (sstable, local_cache_meta_block_miss, ..) =
flatten_resps.pop().unwrap().unwrap();
assert_eq!(sstable_info.get_object_id(), sstable.value().id);
local_stats.apply_meta_fetch(local_cache_meta_block_miss);
Expand Down
21 changes: 20 additions & 1 deletion src/storage/src/monitor/hummock_state_store_metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ use std::sync::Arc;
use prometheus::core::{AtomicU64, Collector, Desc, GenericCounterVec};
use prometheus::{
exponential_buckets, histogram_opts, proto, register_histogram_vec_with_registry,
register_int_counter_vec_with_registry, HistogramVec, IntGauge, Opts, Registry,
register_int_counter_vec_with_registry, register_int_gauge_with_registry, HistogramVec,
IntGauge, Opts, Registry,
};

/// [`HummockStateStoreMetrics`] stores the performance and IO metrics of `XXXStore` such as
Expand All @@ -35,6 +36,8 @@ pub struct HummockStateStoreMetrics {
pub get_shared_buffer_hit_counts: GenericCounterVec<AtomicU64>,
pub remote_read_time: HistogramVec,
pub iter_fetch_meta_duration: HistogramVec,
pub iter_fetch_meta_cache_unhits: IntGauge,
pub iter_slow_fetch_meta_cache_unhits: IntGauge,

pub read_req_bloom_filter_positive_counts: GenericCounterVec<AtomicU64>,
pub read_req_positive_but_non_exist_counts: GenericCounterVec<AtomicU64>,
Expand Down Expand Up @@ -113,6 +116,20 @@ impl HummockStateStoreMetrics {
let iter_fetch_meta_duration =
register_histogram_vec_with_registry!(opts, &["table_id"], registry).unwrap();

let iter_fetch_meta_cache_unhits = register_int_gauge_with_registry!(
"state_store_iter_fetch_meta_cache_unhits",
"Number of SST meta cache unhit during one iterator meta fetch",
registry
)
.unwrap();

let iter_slow_fetch_meta_cache_unhits = register_int_gauge_with_registry!(
"state_store_iter_slow_fetch_meta_cache_unhits",
"Number of SST meta cache unhit during a iterator meta fetch which is slow (costs >5 seconds)",
registry
)
.unwrap();

// ----- write_batch -----
let write_batch_tuple_counts = register_int_counter_vec_with_registry!(
"state_store_write_batch_tuple_counts",
Expand Down Expand Up @@ -171,6 +188,8 @@ impl HummockStateStoreMetrics {
get_shared_buffer_hit_counts,
remote_read_time,
iter_fetch_meta_duration,
iter_fetch_meta_cache_unhits,
iter_slow_fetch_meta_cache_unhits,
read_req_bloom_filter_positive_counts,
read_req_positive_but_non_exist_counts,
read_req_check_bloom_filter_counts,
Expand Down

0 comments on commit fcc069a

Please sign in to comment.