diff --git a/CHANGELOG.md b/CHANGELOG.md index 4aa0e753fe9a..8beaa28d5135 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ * Add distribution support (running completion / chat model on different process / machine). * Add conversation history in chat playground. +* Add `/v1/metrics` endpoint for prometheus metrics collection. ## Fixes and Improvements diff --git a/Cargo.lock b/Cargo.lock index 1bf5e4afcd56..ecbe2ef20991 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -486,6 +486,29 @@ dependencies = [ "tower-service", ] +[[package]] +name = "axum-prometheus" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97def327c5481791abb57ac295bfc70f2e1a0727675b7dbf74bd1b27a72b6fd8" +dependencies = [ + "axum", + "axum-core", + "bytes", + "futures", + "futures-core", + "http", + "http-body", + "matchit", + "metrics", + "metrics-exporter-prometheus", + "once_cell", + "pin-project", + "tokio", + "tower", + "tower-http 0.4.0", +] + [[package]] name = "axum-streams" version = "0.9.1" @@ -1789,6 +1812,15 @@ dependencies = [ "ahash 0.7.7", ] +[[package]] +name = "hashbrown" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ff8ae62cd3a9102e5637afc8452c55acf3844001bd5374e0b0bd7b6616c038" +dependencies = [ + "ahash 0.8.3", +] + [[package]] name = "hashbrown" version = "0.14.0" @@ -2433,6 +2465,15 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3ea9b256699eda7b0387ffbc776dd625e28bde3918446381781245b7a50349d8" +[[package]] +name = "mach2" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d0d1830bcd151a6fc4aea1369af235b36c1528fe976b8ff678683c9995eade8" +dependencies = [ + "libc", +] + [[package]] name = "matchers" version = "0.0.1" @@ -2534,6 +2575,61 @@ dependencies = [ "autocfg", ] +[[package]] +name = "metrics" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fde3af1a009ed76a778cb84fdef9e7dbbdf5775ae3e4cc1f434a6a307f6f76c5" +dependencies = [ + "ahash 0.8.3", + "metrics-macros", + "portable-atomic", +] + +[[package]] +name = "metrics-exporter-prometheus" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a4964177ddfdab1e3a2b37aec7cf320e14169abb0ed73999f558136409178d5" +dependencies = [ + "base64 0.21.2", + "hyper", + "indexmap 1.9.3", + "ipnet", + "metrics", + "metrics-util", + "quanta", + "thiserror", + "tokio", + "tracing", +] + +[[package]] +name = "metrics-macros" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddece26afd34c31585c74a4db0630c376df271c285d682d1e55012197830b6df" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.28", +] + +[[package]] +name = "metrics-util" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4de2ed6e491ed114b40b732e4d1659a9d53992ebd87490c44a6ffe23739d973e" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", + "hashbrown 0.13.1", + "metrics", + "num_cpus", + "quanta", + "sketches-ddsketch", +] + [[package]] name = "mime" version = "0.3.17" @@ -3293,6 +3389,22 @@ dependencies = [ "checked_int_cast", ] +[[package]] +name = "quanta" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a17e662a7a8291a865152364c20c7abc5e60486ab2001e8ec10b24862de0b9ab" +dependencies = [ + "crossbeam-utils", + "libc", + "mach2", + "once_cell", + "raw-cpuid", + "wasi 0.11.0+wasi-snapshot-preview1", + "web-sys", + "winapi", +] + [[package]] name = "question" version = "0.2.2" @@ -3398,6 +3510,15 @@ dependencies = [ "rand_core 0.5.1", ] +[[package]] +name = "raw-cpuid" +version = "10.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c297679cb867470fa8c9f67dbba74a78d78e3e98d7cf2b08d6d71540f797332" +dependencies = [ + "bitflags 1.3.2", +] + [[package]] name = "rayon" version = "1.7.0" @@ -4424,6 +4545,7 @@ dependencies = [ "async-stream", "async-trait", "axum", + "axum-prometheus", "axum-streams", "axum-tracing-opentelemetry", "chrono", diff --git a/Makefile b/Makefile index 3447eeff423b..468d38b3e1bb 100644 --- a/Makefile +++ b/Makefile @@ -24,8 +24,9 @@ bump-release-version: cargo ws version --allow-branch "r*" --no-individual-tags --force "*" update-openapi-doc: - curl http://localhost:8080/api-docs/openapi.json | jq ' \ - delpaths([ \ + curl http://localhost:8080/api-docs/openapi.json | jq ' \ + delpaths([ \ + ["paths", "/v1/metrics"], \ ["paths", "/v1beta/chat/completions"], \ ["paths", "/v1beta/search"], \ ["components", "schemas", "CompletionRequest", "properties", "prompt"], \ @@ -37,4 +38,4 @@ update-openapi-doc: > website/static/openapi.json update-graphql-schema: - cargo run --package tabby-webserver --example update-schema \ No newline at end of file + cargo run --package tabby-webserver --example update-schema diff --git a/crates/tabby/Cargo.toml b/crates/tabby/Cargo.toml index e64e11deb5dd..2c804e36f487 100644 --- a/crates/tabby/Cargo.toml +++ b/crates/tabby/Cargo.toml @@ -49,6 +49,7 @@ async-trait.workspace = true tabby-webserver = { path = "../../ee/tabby-webserver", optional = true } thiserror.workspace = true chrono = "0.4.31" +axum-prometheus = "0.4.0" [dependencies.uuid] version = "1.3.3" diff --git a/crates/tabby/src/routes/metrics.rs b/crates/tabby/src/routes/metrics.rs new file mode 100644 index 000000000000..9aaa9cbacdfc --- /dev/null +++ b/crates/tabby/src/routes/metrics.rs @@ -0,0 +1,16 @@ +use std::sync::Arc; + +use axum::extract::State; +use axum_prometheus::metrics_exporter_prometheus::PrometheusHandle; + +#[utoipa::path( + get, + path = "/v1/metrics", + tag = "v1", + responses( + (status = 200, description = "Success", body = String, content_type = "text/plain"), + ) +)] +pub async fn metrics(State(state): State>) -> String { + state.render() +} diff --git a/crates/tabby/src/routes/mod.rs b/crates/tabby/src/routes/mod.rs index 2b52910c9c0b..e46396e72c93 100644 --- a/crates/tabby/src/routes/mod.rs +++ b/crates/tabby/src/routes/mod.rs @@ -2,10 +2,12 @@ mod chat; mod completions; mod events; mod health; +mod metrics; mod search; pub use chat::*; pub use completions::*; pub use events::*; pub use health::*; +pub use metrics::*; pub use search::*; diff --git a/crates/tabby/src/serve.rs b/crates/tabby/src/serve.rs index 2668f5a74756..a653e6daeeef 100644 --- a/crates/tabby/src/serve.rs +++ b/crates/tabby/src/serve.rs @@ -5,6 +5,7 @@ use std::{ }; use axum::{routing, Router, Server}; +use axum_prometheus::{metrics_exporter_prometheus::PrometheusHandle, PrometheusMetricLayer}; use axum_tracing_opentelemetry::opentelemetry_tracing_layer; use clap::Args; use tabby_common::{ @@ -49,7 +50,7 @@ Install following IDE / Editor extensions to get started with [Tabby](https://gi servers( (url = "/", description = "Server"), ), - paths(routes::log_event, routes::completions, routes::completions, routes::health, routes::search), + paths(routes::log_event, routes::completions, routes::completions, routes::health, routes::search, routes::metrics), components(schemas( api::event::LogEventRequest, completion::CompletionRequest, @@ -108,9 +109,11 @@ pub async fn main(config: &Config, args: &ServeArgs) { let logger = Arc::new(create_logger()); let code = Arc::new(create_code_search()); + let (prometheus_layer, prometheus_handle) = PrometheusMetricLayer::pair(); + let metrics_handle = Arc::new(prometheus_handle); let app = Router::new() - .merge(api_router(args, config, logger.clone(), code.clone()).await) + .merge(api_router(args, config, logger.clone(), code.clone(), metrics_handle).await) .merge(SwaggerUi::new("/swagger-ui").url("/api-docs/openapi.json", ApiDoc::openapi())); #[cfg(feature = "ee")] @@ -121,7 +124,8 @@ pub async fn main(config: &Config, args: &ServeArgs) { let app = app .layer(CorsLayer::permissive()) - .layer(opentelemetry_tracing_layer()); + .layer(opentelemetry_tracing_layer()) + .layer(prometheus_layer); let address = SocketAddr::from((Ipv4Addr::UNSPECIFIED, args.port)); info!("Listening at {}", address); @@ -148,6 +152,7 @@ async fn api_router( config: &Config, logger: Arc, code: Arc, + metrics_handle: Arc, ) -> Router { let completion_state = if let Some(model) = &args.model { Some(Arc::new( @@ -179,6 +184,7 @@ async fn api_router( args.chat_model.as_deref(), &args.device, )); + routers.push({ Router::new() .route( @@ -193,6 +199,10 @@ async fn api_router( "/v1/health", routing::get(routes::health).with_state(health_state), ) + .route( + "/v1/metrics", + routing::get(routes::metrics).with_state(metrics_handle), + ) }); if let Some(completion_state) = completion_state { diff --git a/crates/tabby/src/worker.rs b/crates/tabby/src/worker.rs index f874a21912eb..77caa3d9b85c 100644 --- a/crates/tabby/src/worker.rs +++ b/crates/tabby/src/worker.rs @@ -6,6 +6,7 @@ use std::{ use anyhow::Result; use axum::{routing, Router}; +use axum_prometheus::PrometheusMetricLayer; use axum_tracing_opentelemetry::opentelemetry_tracing_layer; use clap::Args; use hyper::Server; @@ -84,14 +85,22 @@ pub async fn main(kind: WorkerKind, args: &WorkerArgs) { info!("Starting worker, this might takes a few minutes..."); let context = WorkerContext::new(&args.url).await; + + let (prometheus_layer, prometheus_handle) = PrometheusMetricLayer::pair(); + let app = match kind { WorkerKind::Completion => make_completion_route(context, args).await, WorkerKind::Chat => make_chat_route(context, args).await, }; let app = app + .route( + "/v1/metrics", + routing::get(routes::metrics).with_state(Arc::new(prometheus_handle)), + ) .layer(CorsLayer::permissive()) - .layer(opentelemetry_tracing_layer()); + .layer(opentelemetry_tracing_layer()) + .layer(prometheus_layer); let address = SocketAddr::from((Ipv4Addr::UNSPECIFIED, args.port)); info!("Listening at {}", address);