Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
xiamaz committed Oct 2, 2023
1 parent 2f68a15 commit 79f376a
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 4 deletions.
14 changes: 14 additions & 0 deletions clinvar_this/io/tsv.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ class StrucVarTsvRecord:
clinical_significance_comment: typing.Optional[str] = None
#: HPO terms for clinical features
hpo_terms: typing.Optional[typing.List[str]] = None
#: Pubmed PMID for literature references
pmids: typing.Optional[typing.List[str]] = None


@attrs.frozen
Expand Down Expand Up @@ -373,6 +375,13 @@ def _join_list(xs: typing.List[typing.Any]) -> str:
converter=_str_list,
extractor=lambda r: _join_list(r.omim),
),
StrucVarHeaderColumn(
header_names=("PMID",),
key="hpo_terms",
required=False,
converter=_str_list,
extractor=lambda r: _join_list(r.pmids or []),
),
)


Expand Down Expand Up @@ -965,6 +974,10 @@ def _inheritance(submission: SubmissionClinvarSubmission) -> typing.Optional[Mod
else:
return None

def _pmids(submission: SubmissionClinvarSubmission) -> typing.Optional[typing.List[str]]:
if citations := submission.clinical_significance.citation:
return [c.id for c in citations if c.db == CitationDb.PUBMED]

def submission_to_struc_var_tsv_record(
submission: SubmissionClinvarSubmission,
) -> typing.Optional[StrucVarTsvRecord]:
Expand Down Expand Up @@ -1020,6 +1033,7 @@ def submission_to_struc_var_tsv_record(
stop=chromosome_coordinates.stop,
sv_type=variant_type,
omim=_condition(submission),
pmids=_pmids(submission),
inheritance=_inheritance(submission),
clinical_significance_description=submission.clinical_significance.clinical_significance_description,
local_key=submission.local_key or "",
Expand Down
6 changes: 6 additions & 0 deletions docs/file_formats.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ The following headers are optional:
Filled automatically with a UUID if missing, recommeded to leave empty.
- ``HPO`` - List of HPO terms separated by comma or semicolon, any space will be stripped.
E.g., ``HP:0004322; HP:0001263``.
- ``PMID`` - List of Pubmed IDs separated by a comma or semicolon, any space
will be stripped.
E.g., ``31859447‚29474920``.
- ``$remove_from_batch`` - you can use this for removing a previously added variant from the given batch; one of ``true`` and ``false``, defaults to ``false``.

Any further header will be imported into the local repository into an ``extra_data`` field.
Expand Down Expand Up @@ -88,6 +91,9 @@ The following headers are optional:
Filled automatically with a UUID if missing, recommeded to leave empty.
- ``HPO`` - List of HPO terms separated by comma or semicolon, any space will be stripped.
E.g., ``HP:0004322; HP:0001263``.
- ``PMID`` - List of Pubmed IDs separated by a comma or semicolon, any space
will be stripped.
E.g., ``31859447‚29474920``.
- ``$remove_from_batch`` - you can use this for removing a previously added variant from the given batch; one of ``true`` and ``false``, defaults to ``false``.

Any further header will be imported into the local repository into an ``extra_data`` field.
Expand Down
2 changes: 2 additions & 0 deletions tests/clinvar_this/data/io_tsv/example_citation.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ASSEMBLY CHROM POS REF ALT OMIM MOI CLIN_SIG KEY gene PMID
GRCh37 10 115614632 A G OMIM:618278 Autosomal recessive inheritance not provided KEY NHLRC2 123456,000001
8 changes: 4 additions & 4 deletions tests/clinvar_this/test_batches.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,10 +268,10 @@ def test_export_seq_variant_tsv(fs, app_config, exists, force):
if not exists or force:
expected = "\n".join(
[
"ASSEMBLY\tCHROM\tPOS\tREF\tALT\tOMIM\tMOI\tCLIN_SIG\tCLIN_EVAL\tCLIN_COMMENT\tKEY\tHPO",
"ASSEMBLY\tCHROM\tPOS\tREF\tALT\tOMIM\tMOI\tCLIN_SIG\tCLIN_EVAL\tCLIN_COMMENT\tKEY\tHPO\tPMID",
(
"GRCh37\t19\t48183936\tC\tCA\t619325\tAutosomal dominant inheritance\t"
"Likely pathogenic\t\t\t\tHP:0004322,HP:0001263\n"
"Likely pathogenic\t\t\t\tHP:0004322,HP:0001263\t\n"
),
]
)
Expand Down Expand Up @@ -319,11 +319,11 @@ def test_export_structural_variant_tsv(fs, app_config, exists, force):
[
(
"ASSEMBLY\tCHROM\tSTART\tSTOP\tSV_TYPE\tOMIM\tMOI\tCLIN_SIG\tCLIN_EVAL\t"
"CLIN_COMMENT\tKEY\tHPO"
"CLIN_COMMENT\tKEY\tHPO\tPMID"
),
(
"GRCh38\t1\t844347\t4398122\tDeletion\t\tAutosomal dominant inheritance\t"
"not provided\t\t\t\t\n"
"not provided\t\t\t\t\t\n"
),
]
)
Expand Down
20 changes: 20 additions & 0 deletions tests/clinvar_this/test_io_tsv.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,26 @@ def test_read_seq_var_tsv_file():
]


def test_read_seq_var_tsv_file_citation():
with (DATA_DIR / "example_citation.tsv").open("rt") as inputf:
actual = read_seq_var_tsv(file=inputf)
assert actual == [
SeqVarTsvRecord(
assembly=Assembly.GRCH37,
chromosome=Chromosome.CHR10,
pos=115614632,
ref="A",
alt="G",
omim=["OMIM:618278"],
inheritance=ModeOfInheritance.AUTOSOMAL_RECESSIVE_INHERITANCE,
clinical_significance_description=ClinicalSignificanceDescription.NOT_PROVIDED,
local_key="KEY",
pmids=["123456", "000001"],
extra_data={"gene": "NHLRC2"},
)
]


def test_read_struc_var_tsv_file():
with (DATA_DIR / "example_sv.tsv").open("rt") as inputf:
actual = read_struc_var_tsv(file=inputf)
Expand Down

0 comments on commit 79f376a

Please sign in to comment.