Skip to content

Commit

Permalink
chore: add more metrics for region migration (#4838)
Browse files Browse the repository at this point in the history
  • Loading branch information
WenyXu authored Oct 16, 2024
1 parent c231eee commit 0ce93f0
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 1 deletion.
15 changes: 14 additions & 1 deletion src/meta-srv/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,20 @@ lazy_static! {
/// Meta kv cache miss counter.
pub static ref METRIC_META_KV_CACHE_MISS: IntCounterVec =
register_int_counter_vec!("greptime_meta_kv_cache_miss", "meta kv cache miss", &["op"]).unwrap();
// Heartbeat received by metasrv.
/// Heartbeat received by metasrv.
pub static ref METRIC_META_HEARTBEAT_RECV: IntCounterVec =
register_int_counter_vec!("greptime_meta_heartbeat_recv", "heartbeats received by metasrv", &["pusher_key"]).unwrap();
/// The migration execute histogram.
pub static ref METRIC_META_REGION_MIGRATION_EXECUTE: HistogramVec =
register_histogram_vec!("greptime_meta_region_migration_execute", "meta region migration execute", &["state"]).unwrap();
/// The migration error counter.
pub static ref METRIC_META_REGION_MIGRATION_ERROR: IntCounterVec =
register_int_counter_vec!("greptime_meta_region_migration_error", "meta region migration abort", &["state", "error_type"]).unwrap();
/// The migration datanode counter.
pub static ref METRIC_META_REGION_MIGRATION_DATANODES: IntCounterVec =
register_int_counter_vec!("greptime_meta_region_migration_stat", "meta region migration stat", &["datanode_type", "datanode_id"]).unwrap();
/// The migration fail counter.
pub static ref METRIC_META_REGION_MIGRATION_FAIL: IntCounter =
register_int_counter!("greptime_meta_region_migration_fail", "meta region migration fail").unwrap();

}
17 changes: 17 additions & 0 deletions src/meta-srv/src/procedure/region_migration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ use tokio::time::Instant;

use self::migration_start::RegionMigrationStart;
use crate::error::{self, Result};
use crate::metrics::{METRIC_META_REGION_MIGRATION_ERROR, METRIC_META_REGION_MIGRATION_EXECUTE};
use crate::service::mailbox::MailboxRef;

/// It's shared in each step and available even after recovering.
Expand Down Expand Up @@ -390,6 +391,12 @@ impl Context {
#[async_trait::async_trait]
#[typetag::serde(tag = "region_migration_state")]
pub(crate) trait State: Sync + Send + Debug {
fn name(&self) -> &'static str {
let type_name = std::any::type_name::<Self>();
// short name
type_name.split("::").last().unwrap_or(type_name)
}

/// Yields the next [State] and [Status].
async fn next(&mut self, ctx: &mut Context) -> Result<(Box<dyn State>, Status)>;

Expand Down Expand Up @@ -478,10 +485,20 @@ impl Procedure for RegionMigrationProcedure {
async fn execute(&mut self, _ctx: &ProcedureContext) -> ProcedureResult<Status> {
let state = &mut self.state;

let name = state.name();
let _timer = METRIC_META_REGION_MIGRATION_EXECUTE
.with_label_values(&[name])
.start_timer();
let (next, status) = state.next(&mut self.context).await.map_err(|e| {
if e.is_retryable() {
METRIC_META_REGION_MIGRATION_ERROR
.with_label_values(&[name, "retryable"])
.inc();
ProcedureError::retry_later(e)
} else {
METRIC_META_REGION_MIGRATION_ERROR
.with_label_values(&[name, "external"])
.inc();
ProcedureError::external(e)
}
})?;
Expand Down
8 changes: 8 additions & 0 deletions src/meta-srv/src/procedure/region_migration/manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ use store_api::storage::RegionId;
use table::table_name::TableName;

use crate::error::{self, Result};
use crate::metrics::{METRIC_META_REGION_MIGRATION_DATANODES, METRIC_META_REGION_MIGRATION_FAIL};
use crate::procedure::region_migration::{
DefaultContextFactory, PersistentContext, RegionMigrationProcedure,
};
Expand Down Expand Up @@ -323,6 +324,12 @@ impl RegionMigrationManager {
schema_name,
..
} = table_info.table_name();
METRIC_META_REGION_MIGRATION_DATANODES
.with_label_values(&["src", &task.from_peer.id.to_string()])
.inc();
METRIC_META_REGION_MIGRATION_DATANODES
.with_label_values(&["desc", &task.to_peer.id.to_string()])
.inc();
let RegionMigrationProcedureTask {
cluster_id,
region_id,
Expand Down Expand Up @@ -358,6 +365,7 @@ impl RegionMigrationManager {

if let Err(e) = watcher::wait(watcher).await {
error!(e; "Failed to wait region migration procedure {procedure_id} for {task}");
METRIC_META_REGION_MIGRATION_FAIL.inc();
return;
}

Expand Down

0 comments on commit 0ce93f0

Please sign in to comment.