Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Plumb and log timings #20

Merged
merged 1 commit into from
Dec 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 57 additions & 31 deletions src/bin/blue_candle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use axum::{
use blue_candle::{
api::{Prediction, VisionDetectionRequest, VisionDetectionResponse},
coco_classes,
detector::{Bbox, Detector, KeyPoint, BIKE_IMAGE_BYTES},
detector::{Bbox, Detector, InferenceTime, KeyPoint, ProcessingTime, BIKE_IMAGE_BYTES},
utils::{download_models, ensure_directory_exists, img_with_bbox, read_jpeg_file, save_image},
};
use candle::utils::cuda_is_available;
Expand Down Expand Up @@ -131,7 +131,11 @@ async fn main() -> anyhow::Result<()> {
}

async fn run_server(args: Args, detector: Detector) -> anyhow::Result<()> {
detector.test_detection()?;
let (_, inference_time, processing_time) = detector.test_detection()?;
info!(
"Server inference startup test, processing time: {:#?}, inference time: {:#?}",
processing_time, inference_time
);

let detector = Arc::new(detector);

Expand All @@ -149,10 +153,10 @@ async fn run_server(args: Args, detector: Detector) -> anyhow::Result<()> {
}

async fn v1_vision_detection(
State(state): State<Arc<Detector>>,
State(detector): State<Arc<Detector>>,
mut multipart: Multipart, // Note multipart needs to be last
) -> Result<Json<VisionDetectionResponse>, BlueCandleError> {
let process_start = Instant::now();
let request_start_time = Instant::now();
let mut vision_request = VisionDetectionRequest::default();

while let Some(field) = multipart.next_field().await? {
Expand All @@ -164,35 +168,36 @@ async fn v1_vision_detection(
if let Some(image_name) = field.file_name().map(|s| s.to_string()) {
vision_request.image_name = image_name;
}
vision_request.image_data = field.bytes().await.unwrap();
//vision_request.image_data = field.bytes().await?;
vision_request.image_data = field.bytes().await?;
}
Some(&_) => {}
None => {}
}
}

let image_ref = vision_request.image_data.clone();
let state2 = state.clone();
let state2 = detector.clone();
// Detection will be slow, (100ms+) so we spawn a blocking task.
let predictions = tokio::task::spawn_blocking(move || -> anyhow::Result<Vec<Prediction>> {
let reader = Reader::new(Cursor::new(image_ref.as_ref()))
.with_guessed_format()
.expect("Cursor io never fails");
let bboxes = state2.detect(reader)?;

let predictions = from_bbox_to_predictions(
bboxes,
vision_request.min_confidence,
&coco_classes::NAMES,
state2.labels(),
);
Ok(predictions)
})
let (predictions, inference_time, processing_time) = tokio::task::spawn_blocking(
move || -> anyhow::Result<(Vec<Prediction>, InferenceTime, ProcessingTime)> {
let reader = Reader::new(Cursor::new(image_ref.as_ref()))
.with_guessed_format()
.expect("Cursor io never fails");
let (bboxes, inference_time, processing_time) = state2.detect(reader)?;

let predictions = from_bbox_to_predictions(
bboxes,
vision_request.min_confidence,
&coco_classes::NAMES,
state2.labels(),
);
Ok((predictions, inference_time, processing_time))
},
)
.await??;

if !predictions.is_empty() {
if let Some(image_path) = state.image_path() {
if let Some(image_path) = detector.image_path() {
let reader = Reader::new(Cursor::new(vision_request.image_data.as_ref()))
.with_guessed_format()
.expect("Cursor io never fails");
Expand All @@ -203,10 +208,14 @@ async fn v1_vision_detection(
}
}

let process_time = Instant::now().duration_since(process_start);

let request_time = Instant::now().duration_since(request_start_time);
let count = predictions.len() as i32;

info!(
"Request time {:#?}, processing time: {:#?}, inference time: {:#?}",
request_time, processing_time, inference_time
);

let response = VisionDetectionResponse {
success: true,
message: "".into(),
Expand All @@ -215,12 +224,15 @@ async fn v1_vision_detection(
count,
command: "detect".into(),
module_id: "Yolo8".into(),
execution_provider: "TODO".into(),
execution_provider: if detector.is_gpu() {
"GPU".to_string()
} else {
"CPU".to_string()
},
can_useGPU: cuda_is_available(),
// TODO(xnorpx): measure different times
inference_ms: process_time.as_millis() as i32,
process_ms: process_time.as_millis() as i32,
analysis_round_trip_ms: process_time.as_millis() as i32,
inference_ms: inference_time.as_millis() as i32,
process_ms: processing_time.as_millis() as i32,
analysis_round_trip_ms: request_time.as_millis() as i32,
};
Ok(Json(response))
}
Expand Down Expand Up @@ -303,13 +315,14 @@ pub fn from_bbox_to_predictions(
}

async fn test_image(image: String, args: Args, detector: Detector) -> anyhow::Result<()> {
let start_test_time = Instant::now();
let contents = read_jpeg_file(image.clone()).await?;

let reader = Reader::new(Cursor::new(contents.as_ref()))
.with_guessed_format()
.expect("Cursor io never fails");

let bboxes = detector.detect(reader)?;
let (bboxes, inference_time, processing_time) = detector.detect(reader)?;

let predictions =
from_bbox_to_predictions(bboxes, 0.5, &coco_classes::NAMES, detector.labels());
Expand All @@ -320,12 +333,19 @@ async fn test_image(image: String, args: Args, detector: Detector) -> anyhow::Re
let img = img_with_bbox(predictions, reader, args.legend_size)?;

save_image(img, image, "-od").await?;
let test_time = Instant::now().duration_since(start_test_time);

info!(
"Tested image in {:#?}, processing time: {:#?}, inference time: {:#?}",
test_time, processing_time, inference_time
);

Ok(())
}

async fn test(detector: Detector, args: Args) -> anyhow::Result<()> {
let bboxes = detector.test_detection()?;
let start_test_time = Instant::now();
let (bboxes, inference_time, processing_time) = detector.test_detection()?;
let predictions = from_bbox_to_predictions(
bboxes,
args.confidence_threshold,
Expand All @@ -338,5 +358,11 @@ async fn test(detector: Detector, args: Args) -> anyhow::Result<()> {
let img = img_with_bbox(predictions.clone(), reader, 30)?;
// The api doesn't provide a source id or a source name so we just generate a uuid here.
save_image(img, "test.jpg".into(), "").await?;
let test_time = Instant::now().duration_since(start_test_time);

info!(
"Tested image in {:#?}, processing time: {:#?}, inference time: {:#?}",
test_time, processing_time, inference_time
);
Ok(())
}
68 changes: 43 additions & 25 deletions src/detector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,11 @@ use candle::{
use candle_core as candle;
use candle_nn::{Module, VarBuilder};
use image::{io::Reader, ImageFormat};
use std::{io::Cursor, time::Instant};
use tracing::{debug, info};
use std::{
io::Cursor,
time::{Duration, Instant},
};
use tracing::info;

// For testing
pub static BIKE_IMAGE_BYTES: &[u8] = include_bytes!("../assets/crossing.jpg");
Expand All @@ -18,6 +21,8 @@ static DEFAULT_MODEL: &[u8] = include_bytes!("../models/yolov8n.safetensors");
static DEFAULT_MODEL_MULTIPLES: Multiples = Multiples::n();

pub type Bboxes = Vec<Vec<Bbox<Vec<KeyPoint>>>>;
pub type ProcessingTime = Duration;
pub type InferenceTime = Duration;

#[derive(Clone, Debug)]
pub struct Detector {
Expand All @@ -27,6 +32,7 @@ pub struct Detector {
nms_threshold: f32,
labels: Vec<String>,
image_path: Option<String>,
gpu: bool,
}

impl Detector {
Expand All @@ -38,17 +44,17 @@ impl Detector {
labels: Vec<String>,
image_path: Option<String>,
) -> anyhow::Result<Self> {
let device = if !force_cpu && cuda_is_available() {
let (device, gpu) = if !force_cpu && cuda_is_available() {
info!("Detector is initialized for GPU");
Device::new_cuda(0)?
(Device::new_cuda(0)?, true)
} else {
info!(
"Detector is initialized for CPU with mkl: {:?}, with avx: {:?} with f16c: {:?}",
has_mkl(),
with_avx(),
with_f16c()
);
Device::Cpu
(Device::Cpu, false)
};

let (vb, multiples) = if let Some(model) = model {
Expand All @@ -72,37 +78,44 @@ impl Detector {
device,
labels,
image_path,
gpu,
})
}

pub fn test_detection(&self) -> anyhow::Result<(Bboxes, f32, f32)> {
pub fn is_gpu(&self) -> bool {
self.gpu
}

pub fn test_detection(
&self,
) -> anyhow::Result<((Bboxes, f32, f32), InferenceTime, ProcessingTime)> {
info!("Test detection");
let start_detection_time = Instant::now();
let start_processing_time = Instant::now();
let reader = Reader::new(Cursor::new(BIKE_IMAGE_BYTES))
.with_guessed_format()
.expect("Cursor io never fails");
let bboxes = self.detect_inner(reader)?;
let (bboxes, inference_time) = self.detect_inner(reader)?;
if bboxes.0.is_empty() {
bail!("Detection failed");
}
info!(
"Detection succeeded in: {:#?}",
Instant::now().duration_since(start_detection_time)
);
Ok(bboxes)
let processing_time = Instant::now().duration_since(start_processing_time);
Ok((bboxes, inference_time, processing_time))
}

pub fn detect(&self, reader: Reader<Cursor<&[u8]>>) -> anyhow::Result<(Bboxes, f32, f32)> {
let start_detection_time = Instant::now();
let res = self.detect_inner(reader)?;
debug!(
"Detection succeeded in: {:#?}",
Instant::now().duration_since(start_detection_time)
);
Ok(res)
pub fn detect(
&self,
reader: Reader<Cursor<&[u8]>>,
) -> anyhow::Result<((Bboxes, f32, f32), InferenceTime, ProcessingTime)> {
let start_processing_time = Instant::now();
let (bboxes, inference_time) = self.detect_inner(reader)?;
let processing_time = Instant::now().duration_since(start_processing_time);
Ok((bboxes, inference_time, processing_time))
}

fn detect_inner(&self, reader: Reader<Cursor<&[u8]>>) -> anyhow::Result<(Bboxes, f32, f32)> {
fn detect_inner(
&self,
reader: Reader<Cursor<&[u8]>>,
) -> anyhow::Result<((Bboxes, f32, f32), InferenceTime)> {
assert_eq!(reader.format(), Some(ImageFormat::Jpeg));
let original_image = reader.decode().map_err(candle::Error::wrap)?;

Expand Down Expand Up @@ -134,12 +147,17 @@ impl Detector {
.permute((2, 0, 1))?
};
let image_t = (image_t.unsqueeze(0)?.to_dtype(DType::F32)? * (1. / 255.))?;
let start_inference_time = Instant::now();
let pred = self.model.forward(&image_t)?.squeeze(0)?;
let inference_duration = Instant::now().duration_since(start_inference_time);
let bboxes = from_tensor_to_bbox(&pred, self.confidence_threshold, self.nms_threshold)?;
Ok((
bboxes,
(w as f32 / width as f32),
(h as f32 / height as f32),
(
bboxes,
(w as f32 / width as f32),
(h as f32 / height as f32),
),
inference_duration,
))
}

Expand Down