Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: adding 50bp filter for REF/ALT on clinvar-sv import (#293) #294

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions src/clinvar_sv/cli/import.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
#[arg(long)]
pub path_out_rocksdb: String,

/// Minimal VCF REF/ALT length to consider as SV.
#[arg(long, default_value_t = 50)]
pub min_var_size: u32,
/// Name of the column family to import into.
#[arg(long, default_value = "clinvar-sv")]
pub cf_name: String,
Expand All @@ -44,10 +47,10 @@
// Open reader, possibly decompressing gziped files.
let reader: Box<dyn std::io::Read> = if path_in_jsonl.ends_with(".gz") {
Box::new(flate2::read::GzDecoder::new(std::fs::File::open(
&path_in_jsonl,

Check warning on line 50 in src/clinvar_sv/cli/import.rs

View workflow job for this annotation

GitHub Actions / clippy

the borrowed expression implements the required traits

warning: the borrowed expression implements the required traits --> src/clinvar_sv/cli/import.rs:50:13 | 50 | &path_in_jsonl, | ^^^^^^^^^^^^^^ help: change this to: `path_in_jsonl` | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#needless_borrow = note: `#[warn(clippy::needless_borrow)]` on by default
)?))
} else {
Box::new(std::fs::File::open(&path_in_jsonl)?)

Check warning on line 53 in src/clinvar_sv/cli/import.rs

View workflow job for this annotation

GitHub Actions / clippy

the borrowed expression implements the required traits

warning: the borrowed expression implements the required traits --> src/clinvar_sv/cli/import.rs:53:38 | 53 | Box::new(std::fs::File::open(&path_in_jsonl)?) | ^^^^^^^^^^^^^^ help: change this to: `path_in_jsonl` | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#needless_borrow
};

let reader = std::io::BufReader::new(reader);
Expand Down Expand Up @@ -89,6 +92,23 @@
outer_stop,
} = sequence_location;

if let (Some(reference_allele_vcf), Some(alternate_allee_vcf)) =
(reference_allele_vcf.as_ref(), alternate_allele_vcf.as_ref())
{
if reference_allele_vcf.len() < args.min_var_size as usize
&& alternate_allee_vcf.len() < args.min_var_size as usize
{
tracing::debug!(
"skipping line because of short REF/ALT: {}/{}: {}>{}",
&vcv,
&rcv,

Check warning on line 104 in src/clinvar_sv/cli/import.rs

View check run for this annotation

Codecov / codecov/patch

src/clinvar_sv/cli/import.rs#L103-L104

Added lines #L103 - L104 were not covered by tests
reference_allele_vcf,
alternate_allee_vcf,
);
continue;
}
}

let (start, stop, inner_start, inner_stop, outer_start, outer_stop) =
if let (Some(start), Some(stop)) = (start, stop) {
(
Expand Down Expand Up @@ -267,6 +287,7 @@
String::from("tests/clinvar-sv/clinvar-variants-grch37-strucvars.jsonl"),
],
path_out_rocksdb: format!("{}", tmp_dir.join("out-rocksdb").display()),
min_var_size: 50,
cf_name: String::from("clinvar-sv"),
cf_name_by_rcv: String::from("clinvar-sv-by-rcv"),
path_wal_dir: None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,6 @@
source: src/clinvar_sv/cli/query.rs
expression: "&out_data"
---
{"release":"GRCh37","chromosome":"22","start":41320486,"stop":41320486,"reference":"G","alternative":"T","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":5,"vcv":"VCV000000051","reference_assertions":[{"rcv":"RCV000000068","title":"NM_022098.4(XPNPEP3):c.1357G>T (p.Gly453Cys) AND Nephronophthisis-like nephropathy 1","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"22","start":41305199,"stop":41305202,"reference":"TCAAA","alternative":"T","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":0,"vcv":"VCV000000052","reference_assertions":[{"rcv":"RCV000000069","title":"NM_022098.4(XPNPEP3):c.931_934del (p.Asn311fs) AND Nephronophthisis-like nephropathy 1","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"15","start":49048486,"stop":49048486,"reference":"G","alternative":"A","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":5,"vcv":"VCV000000056","reference_assertions":[{"rcv":"RCV000000073","title":"NM_001194998.2(CEP152):c.2959C>T (p.Arg987Ter) AND Microcephaly 9, primary, autosomal recessive","clinical_significance":1,"review_status":3}]}
{"release":"GRCh37","chromosome":"19","start":45315576,"stop":45315576,"reference":"C","alternative":"T","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":5,"vcv":"VCV000000443","reference_assertions":[{"rcv":"RCV000000472","title":"NM_005581.5(BCAM):c.361C>T (p.Arg121Ter) AND BLOOD GROUP--LUTHERAN NULL","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"3","start":98304466,"stop":98304466,"reference":"G","alternative":"A","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":5,"vcv":"VCV000000451","reference_assertions":[{"rcv":"RCV000000480","title":"NM_000097.7(CPOX):c.991C>T (p.Arg331Trp) AND Coproporphyria","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"3","start":98311840,"stop":98311860,"reference":"TACCTGTGCCAGAGCCTGGCAC","alternative":"T","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":0,"vcv":"VCV000000455","reference_assertions":[{"rcv":"RCV000000484","title":"NM_000097.7(CPOX):c.489_509del (p.Cys164_Val170del) AND Coproporphyria","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"3","start":98307627,"stop":98307627,"reference":"G","alternative":"C","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":5,"vcv":"VCV000000456","reference_assertions":[{"rcv":"RCV000000485","title":"NM_000097.7(CPOX):c.883C>G (p.His295Asp) AND Coproporphyria","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"3","start":98300248,"stop":98300248,"reference":"T","alternative":"C","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":5,"vcv":"VCV000000457","reference_assertions":[{"rcv":"RCV000000486","title":"NM_000097.7(CPOX):c.1277+3A>G AND Harderoporphyria","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"3","start":98309933,"stop":98309933,"reference":"G","alternative":"A","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":5,"vcv":"VCV000000460","reference_assertions":[{"rcv":"RCV000000489","title":"NM_000097.7(CPOX):c.623C>T (p.Ser208Phe) AND Coproporphyria","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"3","start":98304475,"stop":98304475,"reference":"G","alternative":"A","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":5,"vcv":"VCV000000461","reference_assertions":[{"rcv":"RCV000000490","title":"NM_000097.7(CPOX):c.982C>T (p.Arg328Cys) AND Coproporphyria","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"3","start":98307653,"stop":98307654,"reference":"G","alternative":"GT","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":1,"vcv":"VCV000000462","reference_assertions":[{"rcv":"RCV000000491","title":"NM_000097.7(CPOX):c.856dup (p.Thr286fs) AND Coproporphyria","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"3","start":98307675,"stop":98307675,"reference":"C","alternative":"G","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":5,"vcv":"VCV000000463","reference_assertions":[{"rcv":"RCV000000492","title":"NM_000097.7(CPOX):c.835G>C (p.Gly279Arg) AND Coproporphyria, digenic","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"11","start":112104201,"stop":112104214,"reference":"AGTTCTTCCTGTAGG","alternative":"A","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":0,"vcv":"VCV000000478","reference_assertions":[{"rcv":"RCV000000507","title":"NM_000317.3(PTS):c.361_374del (p.Val121fs) AND Hyperphenylalaninemia, bh4-deficient, a, due to partial pts deficiency","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"11","start":112099372,"stop":112099372,"reference":"A","alternative":"G","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":5,"vcv":"VCV000000482","reference_assertions":[{"rcv":"RCV000000511","title":"NM_000317.3(PTS):c.139A>G (p.Asn47Asp) AND Hyperphenylalaninemia, bh4-deficient, a, due to partial pts deficiency","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"4","start":17503409,"stop":17503410,"reference":"G","alternative":"GGTA","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":1,"vcv":"VCV000000489","reference_assertions":[{"rcv":"RCV000000518","title":"NM_000320.3(QDPR):c.366_368dup (p.Thr123dup) AND Dihydropteridine reductase deficiency","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"4","start":17503456,"stop":17503456,"reference":"A","alternative":"C","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":5,"vcv":"VCV000000491","reference_assertions":[{"rcv":"RCV000000520","title":"NM_000320.3(QDPR):c.322T>G (p.Trp108Gly) AND Dihydropteridine reductase deficiency","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"4","start":17510986,"stop":17510986,"reference":"A","alternative":"G","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":5,"vcv":"VCV000000492","reference_assertions":[{"rcv":"RCV000000521","title":"NM_000320.3(QDPR):c.106T>C (p.Trp36Arg) AND Dihydropteridine reductase deficiency","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"4","start":17493951,"stop":17493951,"reference":"T","alternative":"C","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":5,"vcv":"VCV000000494","reference_assertions":[{"rcv":"RCV000000523","title":"NM_000320.3(QDPR):c.449A>G (p.Tyr150Cys) AND Dihydropteridine reductase deficiency","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"4","start":17506027,"stop":17506027,"reference":"C","alternative":"T","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":5,"vcv":"VCV000000495","reference_assertions":[{"rcv":"RCV000000524","title":"NM_000320.3(QDPR):c.270G>A (p.Trp90Ter) AND Dihydropteridine reductase deficiency","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"6","start":117198947,"stop":117198947,"reference":"A","alternative":"G","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":5,"vcv":"VCV000000497","reference_assertions":[{"rcv":"RCV000000526","title":"NM_173560.4(RFX6):c.224-12A>G AND Hypoplastic pancreas-intestinal atresia-hypoplastic gallbalder syndrome","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"X","start":155210040,"stop":155242832,"reference":null,"alternative":null,"inner_start":155210040,"inner_stop":null,"outer_start":155242832,"outer_stop":null,"variant_type":0,"vcv":"VCV000057500","reference_assertions":[{"rcv":"RCV000051210","title":"GRCh38/hg38 Xq28(chrX:155980375-156013167)x0 AND See cases","clinical_significance":4,"review_status":3}]}
{"release":"GRCh37","chromosome":"7","start":64691936,"stop":64866073,"reference":null,"alternative":null,"inner_start":64691936,"inner_stop":null,"outer_start":64866073,"outer_stop":null,"variant_type":0,"vcv":"VCV000057566","reference_assertions":[{"rcv":"RCV000051294","title":"GRCh38/hg38 7q11.21(chr7:65231558-65401160)x1 AND See cases","clinical_significance":2,"review_status":3}]}
{"release":"GRCh37","chromosome":"22","start":34150132,"stop":34182300,"reference":null,"alternative":null,"inner_start":34150132,"inner_stop":null,"outer_start":34182300,"outer_stop":null,"variant_type":0,"vcv":"VCV000057627","reference_assertions":[{"rcv":"RCV000051361","title":"GRCh38/hg38 22q12.3(chr22:33754145-33786313)x1 AND See cases","clinical_significance":0,"review_status":3}]}
Expand Down
2 changes: 1 addition & 1 deletion tests/clinvar-sv/clinvar-sv-grch37.tsv.db/000016.sst
Git LFS file not shown
4 changes: 2 additions & 2 deletions tests/clinvar-sv/clinvar-sv-grch37.tsv.db/000018.sst
Git LFS file not shown
4 changes: 2 additions & 2 deletions tests/clinvar-sv/clinvar-sv-grch37.tsv.db/000020.sst
Git LFS file not shown
2 changes: 1 addition & 1 deletion tests/clinvar-sv/clinvar-sv-grch37.tsv.db/IDENTITY
Git LFS file not shown
4 changes: 2 additions & 2 deletions tests/clinvar-sv/clinvar-sv-grch37.tsv.db/LOG
Git LFS file not shown
2 changes: 1 addition & 1 deletion tests/clinvar-sv/clinvar-sv-grch37.tsv.db/MANIFEST-000005
Git LFS file not shown
Loading