Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add retry logic to embedding/rerank api calls #879

Merged
merged 2 commits into from
Sep 24, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 41 additions & 6 deletions src/rag/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,11 @@ use parking_lot::RwLock;
use path_absolutize::Absolutize;
use serde::{Deserialize, Serialize};
use serde_json::json;
use std::{collections::HashMap, fmt::Debug, fs, path::Path};
use std::{collections::HashMap, fmt::Debug, fs, path::Path, time::Duration};
use tokio::time::sleep;

const EMBEDDING_RETRY_LIMIT: usize = 3;
const RERANK_RETRY_LIMIT: usize = 2;

pub struct Rag {
config: GlobalConfig,
Expand Down Expand Up @@ -483,7 +487,23 @@ impl Rag {
}
}
let data = RerankData::new(query.to_string(), documents, top_k);
let list = client.rerank(&data).await?;
let mut retry = 0;
let list = loop {
retry += 1;
match client.rerank(&data).await {
Ok(result) => break result,
Err(e) if retry < RERANK_RETRY_LIMIT => {
debug!("retry {} failed: {}", retry, e);
sleep(Duration::from_secs(retry as _)).await;
continue;
}
Err(e) => {
return Err(e).with_context(|| {
format!("Failed to rerank after {RERANK_RETRY_LIMIT} attempts")
})?
}
}
};
let ids: Vec<_> = list
.into_iter()
.take(top_k)
Expand Down Expand Up @@ -587,10 +607,25 @@ impl Rag {
texts: texts.to_vec(),
query,
};
let chunk_output = embedding_client
.embeddings(&chunk_data)
.await
.context("Failed to create embedding")?;
let mut retry = 0;
let chunk_output = loop {
retry += 1;
match embedding_client.embeddings(&chunk_data).await {
Ok(v) => break v,
Err(e) if retry < EMBEDDING_RETRY_LIMIT => {
debug!("retry {} failed: {}", retry, e);
sleep(Duration::from_secs(retry as _)).await;
continue;
}
Err(e) => {
return Err(e).with_context(|| {
format!(
"Failed to create embedding after {EMBEDDING_RETRY_LIMIT} attempts"
)
})?
}
}
};
output.extend(chunk_output);
}
Ok(output)
Expand Down