Skip to content

Commit

Permalink
Refactor: Use to_writer/from_reader across the codebase (#3443)
Browse files Browse the repository at this point in the history
Mostly to limit assumption that a MinHash/Signature is a JSON file, so
we can control better for possible format changes. Since all calls go
thru `to_writer`/`from_reader` now we can change/support versioned
formats at these method boundaries.
  • Loading branch information
luizirber authored Dec 19, 2024
1 parent f4f5187 commit b69c960
Show file tree
Hide file tree
Showing 8 changed files with 113 additions and 55 deletions.
4 changes: 2 additions & 2 deletions src/core/benches/minhash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ fn intersection(c: &mut Criterion) {
filename.push("../../tests/test-data/gather-abund/genome-s10.fa.gz.sig");
let file = File::open(filename).unwrap();
let reader = BufReader::new(file);
let mut sigs: Vec<Signature> = serde_json::from_reader(reader).expect("Loading error");
let mut sigs = Signature::from_reader(reader).expect("Loading error");
let mh = if let Sketch::MinHash(mh) = &sigs.swap_remove(0).sketches()[0] {
mh.clone()
} else {
Expand All @@ -24,7 +24,7 @@ fn intersection(c: &mut Criterion) {
filename.push("../../tests/test-data/gather-abund/genome-s11.fa.gz.sig");
let file = File::open(filename).unwrap();
let reader = BufReader::new(file);
let mut sigs: Vec<Signature> = serde_json::from_reader(reader).expect("Loading error");
let mut sigs = Signature::from_reader(reader).expect("Loading error");
let mh2 = if let Sketch::MinHash(mh) = &sigs.swap_remove(0).sketches()[0] {
mh.clone()
} else {
Expand Down
16 changes: 8 additions & 8 deletions src/core/src/collection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ mod test {
filename.push("../../tests/test-data/47+63-multisig.sig");
let file = File::open(filename).unwrap();
let reader = BufReader::new(file);
let sigs: Vec<Signature> = serde_json::from_reader(reader).expect("Loading error");
let sigs = Signature::from_reader(reader).expect("Loading error");
// create Selection object
let mut selection = Selection::default();
selection.set_scaled(2000);
Expand All @@ -293,7 +293,7 @@ mod test {
filename.push("../../tests/test-data/47+63-multisig.sig");
let file = File::open(filename).unwrap();
let reader = BufReader::new(file);
let sigs: Vec<Signature> = serde_json::from_reader(reader).expect("Loading error");
let sigs = Signature::from_reader(reader).expect("Loading error");
// create Selection object
let mut selection = Selection::default();
selection.set_scaled(500);
Expand All @@ -314,7 +314,7 @@ mod test {
filename.push("../../tests/test-data/genome-s11.fa.gz.sig");
let file = File::open(filename).unwrap();
let reader = BufReader::new(file);
let sigs: Vec<Signature> = serde_json::from_reader(reader).expect("Loading error");
let sigs = Signature::from_reader(reader).expect("Loading error");
assert_eq!(sigs.len(), 4);
// create Selection object
let mut selection = Selection::default();
Expand All @@ -336,7 +336,7 @@ mod test {
filename.push("../../tests/test-data/genome-s11.fa.gz.sig");
let file = File::open(filename).unwrap();
let reader = BufReader::new(file);
let sigs: Vec<Signature> = serde_json::from_reader(reader).expect("Loading error");
let sigs = Signature::from_reader(reader).expect("Loading error");
let sigs_copy = sigs.clone();
assert_eq!(sigs.len(), 4);
// create Selection object
Expand Down Expand Up @@ -366,7 +366,7 @@ mod test {
filename.push("../../tests/test-data/47+63-multisig.sig");
let file = File::open(filename).unwrap();
let reader = BufReader::new(file);
let sigs: Vec<Signature> = serde_json::from_reader(reader).expect("Loading error");
let sigs = Signature::from_reader(reader).expect("Loading error");
assert_eq!(sigs.len(), 6);
// create Selection object
let mut selection = Selection::default();
Expand All @@ -388,7 +388,7 @@ mod test {
filename.push("../../tests/test-data/genome-s11.fa.gz.sig");
let file = File::open(filename).unwrap();
let reader = BufReader::new(file);
let sigs: Vec<Signature> = serde_json::from_reader(reader).expect("Loading error");
let sigs = Signature::from_reader(reader).expect("Loading error");
assert_eq!(sigs.len(), 4);
// load sigs into collection + select compatible signatures
let mut cl = Collection::from_sigs(sigs).unwrap();
Expand All @@ -413,7 +413,7 @@ mod test {
filename.push("../../tests/test-data/47+63-multisig.sig");
let file = File::open(filename).unwrap();
let reader = BufReader::new(file);
let sigs: Vec<Signature> = serde_json::from_reader(reader).expect("Loading error");
let sigs = Signature::from_reader(reader).expect("Loading error");
// create Selection object
let mut selection = Selection::default();
selection.set_scaled(2000);
Expand Down Expand Up @@ -480,7 +480,7 @@ mod test {
.push("../../tests/test-data/prot/hp/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig");
let file = File::open(filename).unwrap();
let reader = BufReader::new(file);
let sigs: Vec<Signature> = serde_json::from_reader(reader).expect("Loading error");
let sigs = Signature::from_reader(reader).expect("Loading error");
// create Selection object
let mut selection = Selection::default();
selection.set_moltype(HashFunctions::Murmur64Hp);
Expand Down
8 changes: 5 additions & 3 deletions src/core/src/ffi/signature.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use crate::sketch::Sketch;
use crate::ffi::cmd::compute::SourmashComputeParameters;
use crate::ffi::minhash::SourmashKmerMinHash;
use crate::ffi::utils::{ForeignObject, SourmashStr};
use crate::prelude::ToWriter;

pub struct SourmashSignature;

Expand Down Expand Up @@ -193,8 +194,9 @@ unsafe fn signature_eq(ptr: *const SourmashSignature, other: *const SourmashSign
ffi_fn! {
unsafe fn signature_save_json(ptr: *const SourmashSignature) -> Result<SourmashStr> {
let sig = SourmashSignature::as_rust(ptr);
let st = serde_json::to_string(sig)?;
Ok(SourmashStr::from_string(st))
let mut st: Vec<u8> = vec![];
sig.to_writer(&mut st)?;
Ok(SourmashStr::from_string(String::from_utf8_unchecked(st)))
}
}

Expand Down Expand Up @@ -248,7 +250,7 @@ unsafe fn signatures_save_buffer(ptr: *const *const SourmashSignature, size: usi
} else {
Box::new(&mut buffer)
};
serde_json::to_writer(&mut writer, &rsigs)?;
rsigs.to_writer(&mut writer)?;
}

let b = buffer.into_boxed_slice();
Expand Down
32 changes: 21 additions & 11 deletions src/core/src/signature.rs
Original file line number Diff line number Diff line change
Expand Up @@ -792,6 +792,16 @@ impl ToWriter for Signature {
}
}

impl ToWriter for Vec<&Signature> {
fn to_writer<W>(&self, writer: &mut W) -> Result<(), Error>
where
W: io::Write,
{
serde_json::to_writer(writer, &self)?;
Ok(())
}
}

impl Select for Signature {
fn select(mut self, selection: &Selection) -> Result<Self, Error> {
self.signatures.retain(|s| {
Expand Down Expand Up @@ -949,7 +959,7 @@ mod test {

let file = File::open(filename).unwrap();
let reader = BufReader::new(file);
let sigs: Vec<Signature> = serde_json::from_reader(reader).expect("Loading error");
let sigs = Signature::from_reader(reader).expect("Loading error");

assert_eq!(sigs.len(), 4);

Expand Down Expand Up @@ -1072,7 +1082,7 @@ mod test {

let file = File::open(filename).unwrap();
let reader = BufReader::new(file);
let sigs: Vec<Signature> = serde_json::from_reader(reader).expect("Loading error");
let sigs = Signature::from_reader(reader).expect("Loading error");

assert_eq!(sigs.len(), 1);

Expand All @@ -1088,7 +1098,7 @@ mod test {

let file = File::open(filename).unwrap();
let reader = BufReader::new(file);
let sigs: Vec<Signature> = serde_json::from_reader(reader).expect("Loading error");
let sigs = Signature::from_reader(reader).expect("Loading error");

assert_eq!(sigs.len(), 1);

Expand All @@ -1112,7 +1122,7 @@ mod test {

let file = File::open(filename).unwrap();
let reader = BufReader::new(file);
let sigs: Vec<Signature> = serde_json::from_reader(reader).expect("Loading error");
let sigs = Signature::from_reader(reader).expect("Loading error");

assert_eq!(sigs.len(), 1);

Expand All @@ -1137,7 +1147,7 @@ mod test {

let file = File::open(filename).unwrap();
let reader = BufReader::new(file);
let sigs: Vec<Signature> = serde_json::from_reader(reader).expect("Loading error");
let sigs = Signature::from_reader(reader).expect("Loading error");

assert_eq!(sigs.len(), 1);

Expand All @@ -1161,7 +1171,7 @@ mod test {

let file = File::open(filename).unwrap();
let reader = BufReader::new(file);
let sigs: Vec<Signature> = serde_json::from_reader(reader).expect("Loading error");
let sigs = Signature::from_reader(reader).expect("Loading error");

// create Selection object
let mut selection = Selection::default();
Expand All @@ -1187,7 +1197,7 @@ mod test {

let file = File::open(filename).unwrap();
let reader = BufReader::new(file);
let sigs: Vec<Signature> = serde_json::from_reader(reader).expect("Loading error");
let sigs = Signature::from_reader(reader).expect("Loading error");

// create Selection object
let mut selection = Selection::default();
Expand All @@ -1207,7 +1217,7 @@ mod test {

let file = File::open(filename).unwrap();
let reader = BufReader::new(file);
let sigs: Vec<Signature> = serde_json::from_reader(reader).expect("Loading error");
let sigs = Signature::from_reader(reader).expect("Loading error");

// create Selection object
let mut selection = Selection::default();
Expand All @@ -1227,7 +1237,7 @@ mod test {

let file = File::open(filename).unwrap();
let reader = BufReader::new(file);
let sigs: Vec<Signature> = serde_json::from_reader(reader).expect("Loading error");
let sigs = Signature::from_reader(reader).expect("Loading error");

// create Selection object
let mut selection = Selection::default();
Expand All @@ -1248,7 +1258,7 @@ mod test {

let file = File::open(filename).unwrap();
let reader = BufReader::new(file);
let sigs: Vec<Signature> = serde_json::from_reader(reader).expect("Loading error");
let sigs = Signature::from_reader(reader).expect("Loading error");

// create Selection object
let mut selection = Selection::default();
Expand All @@ -1266,7 +1276,7 @@ mod test {

let file = File::open(filename).unwrap();
let reader = BufReader::new(file);
let sigs: Vec<Signature> = serde_json::from_reader(reader).expect("Loading error");
let sigs = Signature::from_reader(reader).expect("Loading error");

// create Selection object
let mut selection = Selection::default();
Expand Down
42 changes: 42 additions & 0 deletions src/core/src/sketch/minhash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use std::cmp::Ordering;
use std::collections::{BTreeMap, BTreeSet};
use std::f64::consts::PI;
use std::fmt::Write;
use std::io;
use std::iter::Peekable;
use std::str;
use std::sync::Mutex;
Expand All @@ -13,6 +14,7 @@ use serde::{Deserialize, Serialize};
use typed_builder::TypedBuilder;

use crate::encodings::HashFunctions;
use crate::prelude::ToWriter;
use crate::signature::SigsTrait;
use crate::sketch::hyperloglog::HyperLogLog;
use crate::Error;
Expand Down Expand Up @@ -183,6 +185,16 @@ impl<'de> Deserialize<'de> for KmerMinHash {
}
}

impl ToWriter for KmerMinHash {
fn to_writer<W>(&self, writer: &mut W) -> Result<(), Error>
where
W: io::Write,
{
serde_json::to_writer(writer, &self)?;
Ok(())
}
}

impl KmerMinHash {
pub fn new(
scaled: ScaledType,
Expand Down Expand Up @@ -856,6 +868,16 @@ impl KmerMinHash {

Ok((abundances, total_abundance))
}

pub fn from_reader<R>(rdr: R) -> Result<KmerMinHash, Error>
where
R: std::io::Read,
{
let (rdr, _format) = niffler::get_reader(Box::new(rdr))?;

let mh: KmerMinHash = serde_json::from_reader(rdr)?;
Ok(mh)
}
}

impl SigsTrait for KmerMinHash {
Expand Down Expand Up @@ -1113,6 +1135,16 @@ impl<'de> Deserialize<'de> for KmerMinHashBTree {
}
}

impl ToWriter for KmerMinHashBTree {
fn to_writer<W>(&self, writer: &mut W) -> Result<(), Error>
where
W: io::Write,
{
serde_json::to_writer(writer, &self)?;
Ok(())
}
}

impl KmerMinHashBTree {
pub fn new(
scaled: ScaledType,
Expand Down Expand Up @@ -1594,6 +1626,16 @@ impl KmerMinHashBTree {
self.size() as u64
}
}

pub fn from_reader<R>(rdr: R) -> Result<KmerMinHashBTree, Error>
where
R: std::io::Read,
{
let (rdr, _format) = niffler::get_reader(Box::new(rdr))?;

let mh: KmerMinHashBTree = serde_json::from_reader(rdr)?;
Ok(mh)
}
}

impl SigsTrait for KmerMinHashBTree {
Expand Down
11 changes: 7 additions & 4 deletions src/core/src/wasm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use wasm_bindgen::prelude::*;

use crate::cmd::ComputeParameters as _ComputeParameters;
use crate::encodings::HashFunctions;
use crate::prelude::ToWriter;
use crate::signature::Signature as _Signature;
use crate::signature::SigsTrait;
use crate::sketch::minhash::KmerMinHash as _KmerMinHash;
Expand Down Expand Up @@ -66,8 +67,9 @@ impl KmerMinHash {

#[wasm_bindgen]
pub fn to_json(&mut self) -> Result<String, JsErrors> {
let json = serde_json::to_string(&self.0)?;
Ok(json)
let mut st: Vec<u8> = vec![];
self.0.to_writer(&mut st)?;
Ok(unsafe { String::from_utf8_unchecked(st) })
}
}

Expand Down Expand Up @@ -160,8 +162,9 @@ impl Signature {

#[wasm_bindgen]
pub fn to_json(&mut self) -> Result<String, JsErrors> {
let json = serde_json::to_string(&self.0)?;
Ok(json)
let mut st: Vec<u8> = vec![];
self.0.to_writer(&mut st)?;
Ok(unsafe { String::from_utf8_unchecked(st) })
}

pub fn size(&self) -> usize {
Expand Down
Loading

0 comments on commit b69c960

Please sign in to comment.