Skip to content

Commit

Permalink
feat: adding 50bp filter for REF/ALT on clinvar-sv import (#293) (#294)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe authored Nov 16, 2023
1 parent 1837899 commit 831830a
Show file tree
Hide file tree
Showing 8 changed files with 30 additions and 29 deletions.
21 changes: 21 additions & 0 deletions src/clinvar_sv/cli/import.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ pub struct Args {
#[arg(long)]
pub path_out_rocksdb: String,

/// Minimal VCF REF/ALT length to consider as SV.
#[arg(long, default_value_t = 50)]
pub min_var_size: u32,
/// Name of the column family to import into.
#[arg(long, default_value = "clinvar-sv")]
pub cf_name: String,
Expand Down Expand Up @@ -89,6 +92,23 @@ fn jsonl_import(
outer_stop,
} = sequence_location;

if let (Some(reference_allele_vcf), Some(alternate_allee_vcf)) =
(reference_allele_vcf.as_ref(), alternate_allele_vcf.as_ref())
{
if reference_allele_vcf.len() < args.min_var_size as usize
&& alternate_allee_vcf.len() < args.min_var_size as usize
{
tracing::debug!(
"skipping line because of short REF/ALT: {}/{}: {}>{}",
&vcv,
&rcv,
reference_allele_vcf,
alternate_allee_vcf,
);
continue;
}
}

let (start, stop, inner_start, inner_stop, outer_start, outer_stop) =
if let (Some(start), Some(stop)) = (start, stop) {
(
Expand Down Expand Up @@ -267,6 +287,7 @@ mod test {
String::from("tests/clinvar-sv/clinvar-variants-grch37-strucvars.jsonl"),
],
path_out_rocksdb: format!("{}", tmp_dir.join("out-rocksdb").display()),
min_var_size: 50,
cf_name: String::from("clinvar-sv"),
cf_name_by_rcv: String::from("clinvar-sv-by-rcv"),
path_wal_dir: None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,6 @@
source: src/clinvar_sv/cli/query.rs
expression: "&out_data"
---
{"release":"GRCh37","chromosome":"22","start":41320486,"stop":41320486,"reference":"G","alternative":"T","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":5,"vcv":"VCV000000051","reference_assertions":[{"rcv":"RCV000000068","title":"NM_022098.4(XPNPEP3):c.1357G>T (p.Gly453Cys) AND Nephronophthisis-like nephropathy 1","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"22","start":41305199,"stop":41305202,"reference":"TCAAA","alternative":"T","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":0,"vcv":"VCV000000052","reference_assertions":[{"rcv":"RCV000000069","title":"NM_022098.4(XPNPEP3):c.931_934del (p.Asn311fs) AND Nephronophthisis-like nephropathy 1","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"15","start":49048486,"stop":49048486,"reference":"G","alternative":"A","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":5,"vcv":"VCV000000056","reference_assertions":[{"rcv":"RCV000000073","title":"NM_001194998.2(CEP152):c.2959C>T (p.Arg987Ter) AND Microcephaly 9, primary, autosomal recessive","clinical_significance":1,"review_status":3}]}
{"release":"GRCh37","chromosome":"19","start":45315576,"stop":45315576,"reference":"C","alternative":"T","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":5,"vcv":"VCV000000443","reference_assertions":[{"rcv":"RCV000000472","title":"NM_005581.5(BCAM):c.361C>T (p.Arg121Ter) AND BLOOD GROUP--LUTHERAN NULL","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"3","start":98304466,"stop":98304466,"reference":"G","alternative":"A","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":5,"vcv":"VCV000000451","reference_assertions":[{"rcv":"RCV000000480","title":"NM_000097.7(CPOX):c.991C>T (p.Arg331Trp) AND Coproporphyria","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"3","start":98311840,"stop":98311860,"reference":"TACCTGTGCCAGAGCCTGGCAC","alternative":"T","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":0,"vcv":"VCV000000455","reference_assertions":[{"rcv":"RCV000000484","title":"NM_000097.7(CPOX):c.489_509del (p.Cys164_Val170del) AND Coproporphyria","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"3","start":98307627,"stop":98307627,"reference":"G","alternative":"C","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":5,"vcv":"VCV000000456","reference_assertions":[{"rcv":"RCV000000485","title":"NM_000097.7(CPOX):c.883C>G (p.His295Asp) AND Coproporphyria","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"3","start":98300248,"stop":98300248,"reference":"T","alternative":"C","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":5,"vcv":"VCV000000457","reference_assertions":[{"rcv":"RCV000000486","title":"NM_000097.7(CPOX):c.1277+3A>G AND Harderoporphyria","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"3","start":98309933,"stop":98309933,"reference":"G","alternative":"A","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":5,"vcv":"VCV000000460","reference_assertions":[{"rcv":"RCV000000489","title":"NM_000097.7(CPOX):c.623C>T (p.Ser208Phe) AND Coproporphyria","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"3","start":98304475,"stop":98304475,"reference":"G","alternative":"A","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":5,"vcv":"VCV000000461","reference_assertions":[{"rcv":"RCV000000490","title":"NM_000097.7(CPOX):c.982C>T (p.Arg328Cys) AND Coproporphyria","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"3","start":98307653,"stop":98307654,"reference":"G","alternative":"GT","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":1,"vcv":"VCV000000462","reference_assertions":[{"rcv":"RCV000000491","title":"NM_000097.7(CPOX):c.856dup (p.Thr286fs) AND Coproporphyria","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"3","start":98307675,"stop":98307675,"reference":"C","alternative":"G","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":5,"vcv":"VCV000000463","reference_assertions":[{"rcv":"RCV000000492","title":"NM_000097.7(CPOX):c.835G>C (p.Gly279Arg) AND Coproporphyria, digenic","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"11","start":112104201,"stop":112104214,"reference":"AGTTCTTCCTGTAGG","alternative":"A","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":0,"vcv":"VCV000000478","reference_assertions":[{"rcv":"RCV000000507","title":"NM_000317.3(PTS):c.361_374del (p.Val121fs) AND Hyperphenylalaninemia, bh4-deficient, a, due to partial pts deficiency","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"11","start":112099372,"stop":112099372,"reference":"A","alternative":"G","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":5,"vcv":"VCV000000482","reference_assertions":[{"rcv":"RCV000000511","title":"NM_000317.3(PTS):c.139A>G (p.Asn47Asp) AND Hyperphenylalaninemia, bh4-deficient, a, due to partial pts deficiency","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"4","start":17503409,"stop":17503410,"reference":"G","alternative":"GGTA","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":1,"vcv":"VCV000000489","reference_assertions":[{"rcv":"RCV000000518","title":"NM_000320.3(QDPR):c.366_368dup (p.Thr123dup) AND Dihydropteridine reductase deficiency","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"4","start":17503456,"stop":17503456,"reference":"A","alternative":"C","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":5,"vcv":"VCV000000491","reference_assertions":[{"rcv":"RCV000000520","title":"NM_000320.3(QDPR):c.322T>G (p.Trp108Gly) AND Dihydropteridine reductase deficiency","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"4","start":17510986,"stop":17510986,"reference":"A","alternative":"G","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":5,"vcv":"VCV000000492","reference_assertions":[{"rcv":"RCV000000521","title":"NM_000320.3(QDPR):c.106T>C (p.Trp36Arg) AND Dihydropteridine reductase deficiency","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"4","start":17493951,"stop":17493951,"reference":"T","alternative":"C","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":5,"vcv":"VCV000000494","reference_assertions":[{"rcv":"RCV000000523","title":"NM_000320.3(QDPR):c.449A>G (p.Tyr150Cys) AND Dihydropteridine reductase deficiency","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"4","start":17506027,"stop":17506027,"reference":"C","alternative":"T","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":5,"vcv":"VCV000000495","reference_assertions":[{"rcv":"RCV000000524","title":"NM_000320.3(QDPR):c.270G>A (p.Trp90Ter) AND Dihydropteridine reductase deficiency","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"6","start":117198947,"stop":117198947,"reference":"A","alternative":"G","inner_start":null,"inner_stop":null,"outer_start":null,"outer_stop":null,"variant_type":5,"vcv":"VCV000000497","reference_assertions":[{"rcv":"RCV000000526","title":"NM_173560.4(RFX6):c.224-12A>G AND Hypoplastic pancreas-intestinal atresia-hypoplastic gallbalder syndrome","clinical_significance":0,"review_status":5}]}
{"release":"GRCh37","chromosome":"X","start":155210040,"stop":155242832,"reference":null,"alternative":null,"inner_start":155210040,"inner_stop":null,"outer_start":155242832,"outer_stop":null,"variant_type":0,"vcv":"VCV000057500","reference_assertions":[{"rcv":"RCV000051210","title":"GRCh38/hg38 Xq28(chrX:155980375-156013167)x0 AND See cases","clinical_significance":4,"review_status":3}]}
{"release":"GRCh37","chromosome":"7","start":64691936,"stop":64866073,"reference":null,"alternative":null,"inner_start":64691936,"inner_stop":null,"outer_start":64866073,"outer_stop":null,"variant_type":0,"vcv":"VCV000057566","reference_assertions":[{"rcv":"RCV000051294","title":"GRCh38/hg38 7q11.21(chr7:65231558-65401160)x1 AND See cases","clinical_significance":2,"review_status":3}]}
{"release":"GRCh37","chromosome":"22","start":34150132,"stop":34182300,"reference":null,"alternative":null,"inner_start":34150132,"inner_stop":null,"outer_start":34182300,"outer_stop":null,"variant_type":0,"vcv":"VCV000057627","reference_assertions":[{"rcv":"RCV000051361","title":"GRCh38/hg38 22q12.3(chr22:33754145-33786313)x1 AND See cases","clinical_significance":0,"review_status":3}]}
Expand Down
2 changes: 1 addition & 1 deletion tests/clinvar-sv/clinvar-sv-grch37.tsv.db/000016.sst
Git LFS file not shown
4 changes: 2 additions & 2 deletions tests/clinvar-sv/clinvar-sv-grch37.tsv.db/000018.sst
Git LFS file not shown
4 changes: 2 additions & 2 deletions tests/clinvar-sv/clinvar-sv-grch37.tsv.db/000020.sst
Git LFS file not shown
2 changes: 1 addition & 1 deletion tests/clinvar-sv/clinvar-sv-grch37.tsv.db/IDENTITY
Git LFS file not shown
4 changes: 2 additions & 2 deletions tests/clinvar-sv/clinvar-sv-grch37.tsv.db/LOG
Git LFS file not shown
2 changes: 1 addition & 1 deletion tests/clinvar-sv/clinvar-sv-grch37.tsv.db/MANIFEST-000005
Git LFS file not shown

0 comments on commit 831830a

Please sign in to comment.