diff --git a/packages_rs/nextclade-cli/src/cli/nextclade_seq_sort.rs b/packages_rs/nextclade-cli/src/cli/nextclade_seq_sort.rs index 8b5c99aef..32648c976 100644 --- a/packages_rs/nextclade-cli/src/cli/nextclade_seq_sort.rs +++ b/packages_rs/nextclade-cli/src/cli/nextclade_seq_sort.rs @@ -3,15 +3,18 @@ use crate::dataset::dataset_download::download_datasets_index_json; use crate::io::http_client::HttpClient; use eyre::{Report, WrapErr}; use itertools::Itertools; -use log::{info, trace, LevelFilter}; +use log::{info, LevelFilter}; use nextclade::io::fasta::{FastaReader, FastaRecord, FastaWriter}; +use nextclade::io::fs::path_to_string; use nextclade::make_error; use nextclade::sort::minimizer_index::{MinimizerIndexJson, MINIMIZER_INDEX_ALGO_VERSION}; use nextclade::sort::minimizer_search::{run_minimizer_search, MinimizerSearchRecord}; +use nextclade::utils::option::OptionMapRefFallible; use nextclade::utils::string::truncate; use serde::Serialize; +use std::collections::btree_map::Entry::{Occupied, Vacant}; use std::collections::BTreeMap; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::str::FromStr; use tinytemplate::TinyTemplate; @@ -112,9 +115,7 @@ pub fn run(args: &NextcladeSortArgs, minimizer_index: &MinimizerIndexJson) -> Re }) .unwrap(); - if result.max_score >= search_params.min_score - && result.total_hits >= search_params.min_hits - { + if result.max_score >= search_params.min_score && result.total_hits >= search_params.min_hits { result_sender .send(MinimizerSearchRecord { fasta_record, result }) .wrap_err("When sending minimizer record into the channel") @@ -129,72 +130,98 @@ pub fn run(args: &NextcladeSortArgs, minimizer_index: &MinimizerIndexJson) -> Re let writer = s.spawn(move || { let output_dir = &output_dir; let output = &output; + writer_thread(output, output_dir, result_receiver).unwrap(); + }); + }); - let tt = output.as_ref().map(move |output| { - let mut tt = TinyTemplate::new(); - tt.add_template("output", output) - .wrap_err_with(|| format!("When parsing template: {output}")) - .unwrap(); - tt - }); - - println!( - "{:40} | {:40} | {:10} | {:10}", - "Seq. name", "dataset", "total hits", "max hit" - ); - - let mut writers = BTreeMap::new(); - - for record in result_receiver { - let dataset = record.result.datasets.first(); - - if let Some(dataset) = dataset { - let name = &dataset.name; - - let filepath = match (&tt, output_dir) { - (Some(tt), None) => { - let filepath_str = tt - .render("output", &OutputTemplateContext { name }) - .wrap_err("When rendering output path template") - .unwrap(); + Ok(()) +} - Some( - PathBuf::from_str(&filepath_str) - .wrap_err_with(|| format!("Invalid output translations path: '{filepath_str}'")) - .unwrap(), - ) - } - (None, Some(output_dir)) => Some(output_dir.join(name).join("sequences.fasta")), - _ => None, - }; - - if let Some(filepath) = filepath { - let writer = writers.entry(filepath.clone()).or_insert_with(|| { - trace!("Creating fasta writer to file {filepath:#?}"); - FastaWriter::from_path(filepath).unwrap() - }); - - writer - .write(&record.fasta_record.seq_name, &record.fasta_record.seq, false) - .unwrap(); - } +fn writer_thread( + output: &Option, + output_dir: &Option, + result_receiver: crossbeam_channel::Receiver, +) -> Result<(), Report> { + let template = output.map_ref_fallible(move |output| -> Result { + let mut template = TinyTemplate::new(); + template + .add_template("output", output) + .wrap_err_with(|| format!("When parsing template: {output}"))?; + Ok(template) + })?; + + println!( + "{:40} | {:40} | {:10} | {:10}", + "Seq. name", "dataset", "total hits", "max hit" + ); + + let mut writers = BTreeMap::new(); + + for record in result_receiver { + for dataset in &record.result.datasets { + let name = &dataset.name; + + let names = name + .split('/') + .scan(PathBuf::new(), |name, component| { + *name = name.join(component); + Some(name.clone()) + }) + .map(path_to_string) + .collect::, Report>>()?; + + for name in names { + let filepath = get_filepath(&name, &template, output_dir)?; + + if let Some(filepath) = filepath { + let writer = get_or_insert_writer(&mut writers, filepath)?; + writer.write(&record.fasta_record.seq_name, &record.fasta_record.seq, false)?; } - - let name_or_empty = dataset.as_ref().map(|dataset| dataset.name.clone()).unwrap_or_default(); - println!( - "{:40} | {:40} | {:>10} | {:>.3}", - &truncate(record.fasta_record.seq_name, 40), - &truncate(name_or_empty, 40), - &record.result.total_hits, - &record.result.max_score - ); } - }); - }); + } + + let dataset = record.result.datasets.first(); + let name_or_empty = dataset.as_ref().map(|dataset| dataset.name.clone()).unwrap_or_default(); + println!( + "{:40} | {:40} | {:>10} | {:>.3}", + &truncate(record.fasta_record.seq_name, 40), + &truncate(name_or_empty, 40), + &record.result.total_hits, + &record.result.max_score + ); + } Ok(()) } +fn get_or_insert_writer( + writers: &mut BTreeMap, + filepath: impl AsRef, +) -> Result<&mut FastaWriter, Report> { + Ok(match writers.entry(filepath.as_ref().to_owned()) { + Occupied(e) => e.into_mut(), + Vacant(e) => e.insert(FastaWriter::from_path(filepath)?), + }) +} + +fn get_filepath( + name: &str, + tt: &Option, + output_dir: &Option, +) -> Result, Report> { + Ok(match (&tt, output_dir) { + (Some(tt), None) => { + let filepath_str = tt + .render("output", &OutputTemplateContext { name }) + .wrap_err("When rendering output path template")?; + + Some(PathBuf::from_str(&filepath_str).wrap_err_with(|| format!("Invalid output path: '{filepath_str}'"))?) + } + (None, Some(output_dir)) => Some(output_dir.join(name).join("sequences.fasta")), + _ => None, + }) +} + #[derive(Serialize)] struct OutputTemplateContext<'a> { name: &'a str, diff --git a/packages_rs/nextclade/benches/bench_create_stripes.rs b/packages_rs/nextclade/benches/bench_create_stripes.rs index e10348d55..29fae0e7f 100644 --- a/packages_rs/nextclade/benches/bench_create_stripes.rs +++ b/packages_rs/nextclade/benches/bench_create_stripes.rs @@ -22,9 +22,7 @@ pub fn bench_create_stripes(c: &mut Criterion) { let excess_bandwidth = black_box(2); let qry_len = black_box(30); let ref_len = black_box(40); - let max_indel = black_box(400); - let allowed_mismatches = black_box(2); - let max_band_area = black_box(500_000_000); + let minimal_bandwidth = black_box(1); let mut group = c.benchmark_group("create_stripes"); group.throughput(Throughput::Bytes(qry_len as u64)); @@ -36,8 +34,7 @@ pub fn bench_create_stripes(c: &mut Criterion) { ref_len, terminal_bandwidth, excess_bandwidth, - allowed_mismatches, - max_band_area, + minimal_bandwidth, ) }); }); diff --git a/packages_rs/nextclade/src/io/fs.rs b/packages_rs/nextclade/src/io/fs.rs index 4120049a9..6c2aaff32 100644 --- a/packages_rs/nextclade/src/io/fs.rs +++ b/packages_rs/nextclade/src/io/fs.rs @@ -89,3 +89,11 @@ pub fn read_reader_to_string(reader: impl Read) -> Result { reader.read_to_string(&mut data)?; Ok(data) } + +pub fn path_to_string(p: impl AsRef) -> Result { + p.as_ref() + .as_os_str() + .to_str() + .map(ToOwned::to_owned) + .ok_or_else(|| eyre!("Unable to convert path to string: {:#?}", p.as_ref())) +}