Skip to content

Commit

Permalink
feat: tolerate failure to load some rag files
Browse files Browse the repository at this point in the history
  • Loading branch information
sigoden committed Sep 8, 2024
1 parent f4a48e6 commit 3ad78cb
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 33 deletions.
45 changes: 42 additions & 3 deletions src/rag/loader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,38 @@ use std::collections::HashMap;
pub const EXTENSION_METADATA: &str = "__extension__";
pub const PATH_METADATA: &str = "__path__";

pub async fn load_document(
loaders: &HashMap<String, String>,
path: &str,
has_error: &mut bool,
) -> (String, Vec<(String, RagMetadata)>) {
let mut maybe_error = None;
let mut files = vec![];
if is_url(path) {
if let Some(path) = path.strip_suffix("**") {
match load_recursive_url(loaders, path).await {
Ok(v) => files.extend(v),
Err(err) => maybe_error = Some(err),
}
} else {
match load_url(loaders, path).await {
Ok(v) => files.push(v),
Err(err) => maybe_error = Some(err),
}
}
} else {
match load_path(loaders, path, has_error).await {
Ok(v) => files.extend(v),
Err(err) => maybe_error = Some(err),
}
}
if let Some(err) = maybe_error {
*has_error = true;
println!("{}", warning_text(&format!("⚠️ {err:?}")));
}
(path.to_string(), files)
}

pub async fn load_recursive_url(
loaders: &HashMap<String, String>,
path: &str,
Expand Down Expand Up @@ -37,7 +69,9 @@ pub async fn load_recursive_url(
pub async fn load_path(
loaders: &HashMap<String, String>,
path: &str,
has_error: &mut bool,
) -> Result<Vec<(String, RagMetadata)>> {
let path = Path::new(path).absolutize()?.display().to_string();
let file_paths = expand_glob_paths(&[path]).await?;
let mut output = vec![];
let file_paths_len = file_paths.len();
Expand All @@ -46,10 +80,15 @@ pub async fn load_path(
1 => output.push(load_file(loaders, &file_paths[0]).await?),
_ => {
for path in file_paths {
println!("🚀 Loading file {path}");
output.push(load_file(loaders, &path).await?)
println!("Load {path}");
match load_file(loaders, &path).await {
Ok(v) => output.push(v),
Err(err) => {
*has_error = true;
println!("{}", warning_text(&format!("Error: {err:?}")));
}
}
}
println!("✨ Load directory completed");
}
}
Ok(output)
Expand Down
33 changes: 3 additions & 30 deletions src/rag/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -275,16 +275,9 @@ impl Rag {
for (index, path) in paths.iter().enumerate() {
let path = path.as_ref();
println!("Load {path} [{}/{paths_len}]", index + 1);
match load_document(&loaders, path).await {
Ok((path, document_files)) => {
files.extend(document_files);
document_paths.push(path);
}
Err(err) => {
has_error = true;
println!("{}", warning_text(&format!("Error: {err:?}")));
}
}
let (path, document_files) = load_document(&loaders, path, &mut has_error).await;
files.extend(document_files);
document_paths.push(path);
}

if has_error {
Expand Down Expand Up @@ -729,26 +722,6 @@ fn add_documents() -> Result<Vec<String>> {
Ok(paths)
}

async fn load_document(
loaders: &HashMap<String, String>,
path: &str,
) -> Result<(String, Vec<(String, RagMetadata)>)> {
let mut files = vec![];
if is_url(path) {
if let Some(path) = path.strip_suffix("**") {
files.extend(load_recursive_url(loaders, path).await?);
} else {
files.push(load_url(loaders, path).await?);
}
Ok((path.to_string(), files))
} else {
let path = Path::new(path);
let path = path.absolutize()?.display().to_string();
files.extend(load_path(loaders, &path).await?);
Ok((path.to_string(), files))
}
}

fn progress(spinner: &Option<Spinner>, message: String) {
if let Some(spinner) = spinner {
let _ = spinner.set_message(message);
Expand Down

0 comments on commit 3ad78cb

Please sign in to comment.