Skip to content

Commit

Permalink
chore(metric): add metric for hummock full GC (#10264)
Browse files Browse the repository at this point in the history
  • Loading branch information
zwang28 authored Jun 15, 2023
1 parent 65f05dd commit d95d3a2
Show file tree
Hide file tree
Showing 7 changed files with 75 additions and 8 deletions.
2 changes: 1 addition & 1 deletion docker/dashboards/risingwave-dev-dashboard.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docker/dashboards/risingwave-user-dashboard.json

Large diffs are not rendered by default.

16 changes: 16 additions & 0 deletions grafana/risingwave-dev-dashboard.dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -2263,6 +2263,22 @@ def section_hummock_manager(outer_panels):
"compaction_group_{{compaction_group_id}}"),
],
),
panels.timeseries_count(
"Full GC Trigger Count",
"total number of attempts to trigger full GC",
[
panels.target(f"{metric('storage_full_gc_trigger_count')}",
"full_gc_trigger_count"),
],
),
panels.timeseries_count(
"Full GC Last Watermark",
"the object id watermark used in last full GC",
[
panels.target(f"{metric('storage_full_gc_last_object_id_watermark')}",
"full_gc_last_object_id_watermark"),
],
),
],
)
]
Expand Down
2 changes: 1 addition & 1 deletion grafana/risingwave-dev-dashboard.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion grafana/risingwave-user-dashboard.json

Large diffs are not rendered by default.

17 changes: 13 additions & 4 deletions src/meta/src/hummock/vacuum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ where
///
/// Returns Ok(false) if there is no worker available.
pub async fn start_full_gc(&self, sst_retention_time: Duration) -> Result<bool> {
self.hummock_manager.metrics.full_gc_trigger_count.inc();
// Set a minimum sst_retention_time to avoid deleting SSTs of on-going write op.
let sst_retention_time = cmp::max(
sst_retention_time,
Expand Down Expand Up @@ -246,27 +247,35 @@ where
tracing::info!("SST full scan returns no SSTs.");
return Ok(0);
}
let metrics = &self.hummock_manager.metrics;
let spin_interval =
Duration::from_secs(self.env.opts.collect_gc_watermark_spin_interval_sec);
let watermark = collect_global_gc_watermark(
self.hummock_manager.cluster_manager().clone(),
spin_interval,
)
.await?;
let sst_number = object_ids.len();
metrics.full_gc_last_object_id_watermark.set(watermark as _);
let candidate_sst_number = object_ids.len();
metrics
.full_gc_candidate_object_count
.observe(candidate_sst_number as _);
// 1. filter by watermark
let object_ids = object_ids
.into_iter()
.filter(|s| *s < watermark)
.collect_vec();
// 2. filter by version
let number = self
let selected_sst_number = self
.hummock_manager
.extend_objects_to_delete_from_scan(&object_ids)
.await;
metrics
.full_gc_selected_object_count
.observe(selected_sst_number as _);
tracing::info!("GC watermark is {}. SST full scan returns {} SSTs. {} remains after filtered by GC watermark. {} remains after filtered by hummock version.",
watermark, sst_number, object_ids.len(), number);
Ok(number)
watermark, candidate_sst_number, object_ids.len(), selected_sst_number);
Ok(selected_sst_number)
}
}

Expand Down
42 changes: 42 additions & 0 deletions src/meta/src/rpc/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,14 @@ pub struct MetaMetrics {
pub min_safepoint_version_id: IntGauge,
/// Compaction groups that is in write stop state.
pub write_stop_compaction_groups: IntGaugeVec,
/// The object id watermark used in last full GC.
pub full_gc_last_object_id_watermark: IntGauge,
/// The number of attempts to trigger full GC.
pub full_gc_trigger_count: IntGauge,
/// The number of candidate object to delete after scanning object store.
pub full_gc_candidate_object_count: Histogram,
/// The number of object to delete after filtering by meta node.
pub full_gc_selected_object_count: Histogram,
/// Hummock version stats
pub version_stats: IntGaugeVec,
/// Total number of objects that is no longer referenced by versions.
Expand Down Expand Up @@ -273,6 +281,36 @@ impl MetaMetrics {
)
.unwrap();

let full_gc_last_object_id_watermark = register_int_gauge_with_registry!(
"storage_full_gc_last_object_id_watermark",
"the object id watermark used in last full GC",
registry
)
.unwrap();

let full_gc_trigger_count = register_int_gauge_with_registry!(
"storage_full_gc_trigger_count",
"the number of attempts to trigger full GC",
registry
)
.unwrap();

let opts = histogram_opts!(
"storage_full_gc_candidate_object_count",
"the number of candidate object to delete after scanning object store",
exponential_buckets(1.0, 10.0, 6).unwrap()
);
let full_gc_candidate_object_count =
register_histogram_with_registry!(opts, registry).unwrap();

let opts = histogram_opts!(
"storage_full_gc_selected_object_count",
"the number of object to delete after filtering by meta node",
exponential_buckets(1.0, 10.0, 6).unwrap()
);
let full_gc_selected_object_count =
register_histogram_with_registry!(opts, registry).unwrap();

let min_safepoint_version_id = register_int_gauge_with_registry!(
"storage_min_safepoint_version_id",
"min safepoint version id",
Expand Down Expand Up @@ -511,6 +549,10 @@ impl MetaMetrics {
min_pinned_version_id,
min_safepoint_version_id,
write_stop_compaction_groups,
full_gc_last_object_id_watermark,
full_gc_trigger_count,
full_gc_candidate_object_count,
full_gc_selected_object_count,
hummock_manager_lock_time,
hummock_manager_real_process_time,
time_after_last_observation: AtomicU64::new(0),
Expand Down

0 comments on commit d95d3a2

Please sign in to comment.