Skip to content

Commit

Permalink
feat: Adding Clinvar Accession to TSV input (#146)
Browse files Browse the repository at this point in the history
  • Loading branch information
xiamaz authored Oct 5, 2023
1 parent ee1fc5d commit 1ef8b8f
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 13 deletions.
4 changes: 3 additions & 1 deletion clinvar_this/batches.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,10 @@ def merge_submission(
base: models.SubmissionClinvarSubmission,
patch: models.SubmissionClinvarSubmission,
) -> models.SubmissionClinvarSubmission:
clinvar_accession = base.clinvar_accession or patch.clinvar_accession
return evolve(
base,
clinvar_accession=clinvar_accession,
condition_set=patch.condition_set,
clinical_significance=patch.clinical_significance,
observed_in=patch.observed_in,
Expand Down Expand Up @@ -248,7 +250,7 @@ def _retrieve_store_response(
submissions = summary_response.submissions or []
for submission in submissions:
local_key_to_accession[
submission.identifiers.local_key
submission.identifiers.local_key or submission.identifiers.clinvar_local_key
] = submission.identifiers.clinvar_accession
errors = [
error_inner.user_message
Expand Down
26 changes: 22 additions & 4 deletions clinvar_this/io/tsv.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ class SeqVarTsvRecord:
clinical_significance_comment: typing.Optional[str] = None
#: HPO terms for clinical features
hpo_terms: typing.Optional[typing.List[str]] = None
#: Existing ClinVar SCV accession
accession: typing.Optional[str] = None


@attrs.frozen
Expand Down Expand Up @@ -106,6 +108,8 @@ class StrucVarTsvRecord:
clinical_significance_comment: typing.Optional[str] = None
#: HPO terms for clinical features
hpo_terms: typing.Optional[typing.List[str]] = None
#: Existing ClinVar SCV accession
accession: typing.Optional[str] = None


@attrs.frozen
Expand Down Expand Up @@ -275,6 +279,13 @@ def _join_list(xs: typing.List[typing.Any]) -> str:
converter=_str_list,
extractor=lambda r: _join_list(r.hpo_terms or []),
),
SeqVarHeaderColumn(
header_names=("ACCESSION",),
key="accession",
required=False,
converter=str,
extractor=lambda r: str(r.accession or ""),
),
)

#: The header columns for structural variant TSV files.
Expand Down Expand Up @@ -363,6 +374,13 @@ def _join_list(xs: typing.List[typing.Any]) -> str:
converter=_str_list,
extractor=lambda r: _join_list(r.omim),
),
StrucVarHeaderColumn(
header_names=("ACCESSION",),
key="accession",
required=False,
converter=str,
extractor=lambda r: str(r.accession or ""),
),
)


Expand Down Expand Up @@ -695,6 +713,7 @@ def record_clinical_features(
clinvar_submission_release_status=release_status,
clinvar_submission=[
SubmissionClinvarSubmission(
clinvar_accession=record.accession,
local_id=str(_uuid4_if_falsy()),
local_key=record.local_key,
condition_set=SubmissionConditionSet(condition=[record_condition(record)]),
Expand Down Expand Up @@ -778,6 +797,7 @@ def record_clinical_features(
clinvar_submission_release_status=release_status,
clinvar_submission=[
SubmissionClinvarSubmission(
clinvar_accession=record.accession,
local_id=str(_uuid4_if_falsy()),
local_key=record.local_key,
condition_set=SubmissionConditionSet(condition=[record_condition(record)]),
Expand Down Expand Up @@ -883,12 +903,11 @@ def submission_to_seq_var_tsv_record(
)

extra_data = {}
if submission.clinvar_accession:
extra_data["clinvar_accession"] = submission.clinvar_accession # XXX
if submission.extra_data:
extra_data.update(submission.extra_data)

return SeqVarTsvRecord(
accession=submission.clinvar_accession,
assembly=chromosome_coordinates.assembly,
chromosome=chromosome_coordinates.chromosome,
pos=chromosome_coordinates.start,
Expand Down Expand Up @@ -980,12 +999,11 @@ def submission_to_struc_var_tsv_record(
)

extra_data = {}
if submission.clinvar_accession:
extra_data["clinvar_accession"] = submission.clinvar_accession # XXX
if submission.extra_data:
extra_data.update(submission.extra_data)

return StrucVarTsvRecord(
accession=submission.clinvar_accession,
assembly=chromosome_coordinates.assembly,
chromosome=chromosome_coordinates.chromosome,
start=chromosome_coordinates.start,
Expand Down
4 changes: 2 additions & 2 deletions tests/clinvar_this/data/io_tsv/example.tsv
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
ASSEMBLY CHROM POS REF ALT OMIM MOI CLIN_SIG KEY gene
GRCh37 10 115614632 A G OMIM:618278 Autosomal recessive inheritance not provided KEY NHLRC2
ASSEMBLY CHROM POS REF ALT OMIM MOI CLIN_SIG KEY gene ACCESSION
GRCh37 10 115614632 A G OMIM:618278 Autosomal recessive inheritance not provided KEY NHLRC2 SCV1
4 changes: 2 additions & 2 deletions tests/clinvar_this/data/io_tsv/example_sv.tsv
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
ASSEMBLY CHROM START STOP SV_TYPE OMIM MOI CLIN_SIG HPO
GRCh38 1 844347 4398122 Deletion Autosomal dominant inheritance not provided HP:0001263
ASSEMBLY CHROM START STOP SV_TYPE OMIM MOI CLIN_SIG HPO ACCESSION
GRCh38 1 844347 4398122 Deletion Autosomal dominant inheritance not provided HP:0001263 SCV1
8 changes: 4 additions & 4 deletions tests/clinvar_this/test_batches.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,10 +268,10 @@ def test_export_seq_variant_tsv(fs, app_config, exists, force):
if not exists or force:
expected = "\n".join(
[
"ASSEMBLY\tCHROM\tPOS\tREF\tALT\tOMIM\tMOI\tCLIN_SIG\tCLIN_EVAL\tCLIN_COMMENT\tKEY\tHPO",
"ASSEMBLY\tCHROM\tPOS\tREF\tALT\tOMIM\tMOI\tCLIN_SIG\tCLIN_EVAL\tCLIN_COMMENT\tKEY\tHPO\tACCESSION",
(
"GRCh37\t19\t48183936\tC\tCA\t619325\tAutosomal dominant inheritance\t"
"Likely pathogenic\t\t\t\tHP:0004322,HP:0001263\n"
"Likely pathogenic\t\t\t\tHP:0004322,HP:0001263\t\n"
),
]
)
Expand Down Expand Up @@ -319,11 +319,11 @@ def test_export_structural_variant_tsv(fs, app_config, exists, force):
[
(
"ASSEMBLY\tCHROM\tSTART\tSTOP\tSV_TYPE\tOMIM\tMOI\tCLIN_SIG\tCLIN_EVAL\t"
"CLIN_COMMENT\tKEY\tHPO"
"CLIN_COMMENT\tKEY\tHPO\tACCESSION"
),
(
"GRCh38\t1\t844347\t4398122\tDeletion\t\tAutosomal dominant inheritance\t"
"not provided\t\t\t\t\n"
"not provided\t\t\t\t\t\n"
),
]
)
Expand Down
4 changes: 4 additions & 0 deletions tests/clinvar_this/test_io_tsv.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def test_read_seq_var_tsv_path():
actual = read_seq_var_tsv(path=DATA_DIR / "example.tsv")
assert actual == [
SeqVarTsvRecord(
accession="SCV1",
assembly=Assembly.GRCH37,
chromosome=Chromosome.CHR10,
pos=115614632,
Expand All @@ -44,6 +45,7 @@ def test_read_struc_var_tsv_path():
actual = read_struc_var_tsv(path=DATA_DIR / "example_sv.tsv")
assert actual == [
StrucVarTsvRecord(
accession="SCV1",
assembly=Assembly.GRCH38,
chromosome=Chromosome.CHR1,
start=844347,
Expand All @@ -62,6 +64,7 @@ def test_read_seq_var_tsv_file():
actual = read_seq_var_tsv(file=inputf)
assert actual == [
SeqVarTsvRecord(
accession="SCV1",
assembly=Assembly.GRCH37,
chromosome=Chromosome.CHR10,
pos=115614632,
Expand All @@ -81,6 +84,7 @@ def test_read_struc_var_tsv_file():
actual = read_struc_var_tsv(file=inputf)
assert actual == [
StrucVarTsvRecord(
accession="SCV1",
assembly=Assembly.GRCH38,
chromosome=Chromosome.CHR1,
start=844347,
Expand Down

0 comments on commit 1ef8b8f

Please sign in to comment.