Skip to content

Commit

Permalink
fix: adding support for "no classification from unflagged records" re…
Browse files Browse the repository at this point in the history
…view status (#171)
  • Loading branch information
holtgrewe authored Dec 11, 2023
1 parent 7b76770 commit f144e74
Show file tree
Hide file tree
Showing 4 changed files with 254 additions and 1 deletion.
4 changes: 3 additions & 1 deletion clinvar_data/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,8 @@ class ReviewStatus(enum.Enum):
- 3 stars: reviewed by expert panel
- 4 stars: reviewed by professional society
In the case that a submission was flagged as duplicate, `FLAGGED_SUBMISSION` is used.
In the case that a submission was flagged as duplicate, ``FLAGGED_SUBMISSION`` is used.
When no unflagged submission is found, ``NO_UNFLAGGED_CLASSIFICATION`` is used.
"""

NO_ASSERTION_PROVIDED = "no assertion provided"
Expand All @@ -110,6 +111,7 @@ class ReviewStatus(enum.Enum):
REVIEWED_BY_EXPERT_PANEL = "reviewed by expert panel"
PRACTICE_GUIDELINE = "practice guideline"
FLAGGED_SUBMISSION = "flagged submission"
NO_UNFLAGGED_CLASSIFICATION = "no classifications from unflagged records"


@enum.unique
Expand Down
249 changes: 249 additions & 0 deletions tests/clinvar_data/data/ex_no_unflagged.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<ReleaseSet xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" Dated="2023-07-30" Type="full" xsi:noNamespaceSchemaLocation="http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_public_1.70.xsd">

<ClinVarSet ID="170238667">
<RecordStatus>current</RecordStatus>
<Title>NM_014780.5(CUL7):c.2592T&gt;G (p.Tyr864Ter) AND 3M syndrome 1</Title>
<ReferenceClinVarAssertion ID="269569" DateLastUpdated="2023-12-09" DateCreated="2014-05-09">
<ClinVarAccession Acc="RCV000115042" Version="3" Type="RCV" DateUpdated="2023-12-09" DateCreated="2014-05-09"/>
<RecordStatus>current</RecordStatus>
<ClinicalSignificance>
<ReviewStatus>no classifications from unflagged records</ReviewStatus>
<Description>no classifications from unflagged records</Description>
</ClinicalSignificance>
<Assertion Type="variation to disease"/>
<ObservedIn>
<Sample>
<Origin>germline</Origin>
<Ethnicity>Arab</Ethnicity>
<Species TaxonomyId="9606">human</Species>
<AffectedStatus>yes</AffectedStatus>
</Sample>
<Method>
<MethodType>research</MethodType>
</Method>
<ObservedData ID="142028773">
<Attribute Type="Description">not provided</Attribute>
</ObservedData>
</ObservedIn>
<MeasureSet Type="Variant" ID="127244" Acc="VCV000127244" Version="1">
<Measure Type="single nucleotide variant" ID="132701">
<Name>
<ElementValue Type="Preferred">NM_014780.5(CUL7):c.2592T&gt;G (p.Tyr864Ter)</ElementValue>
</Name>
<CanonicalSPDI>NC_000006.12:43046303:A:C</CanonicalSPDI>
<AttributeSet>
<Attribute Type="FunctionalConsequence">protein loss of function</Attribute>
<XRef ID="0043" DB="Variation Ontology"/>
</AttributeSet>
<AttributeSet>
<Attribute Accession="NM_001374873" Version="1" Change="c.2592T&gt;G" Type="HGVS, coding, RefSeq">NM_001374873.1:c.2592T&gt;G</Attribute>
</AttributeSet>
<AttributeSet>
<Attribute Accession="NM_001374874" Version="1" Change="c.2592T&gt;G" Type="HGVS, coding, RefSeq">NM_001374874.1:c.2592T&gt;G</Attribute>
</AttributeSet>
<AttributeSet>
<Attribute Accession="NM_014780" Version="5" Change="c.2592T&gt;G" Type="HGVS, coding, RefSeq" MANESelect="true">NM_014780.5:c.2592T&gt;G</Attribute>
</AttributeSet>
<AttributeSet>
<Attribute Accession="NM_001168370" Version="2" Change="c.2688T&gt;G" Type="HGVS, coding, RefSeq">NM_001168370.2:c.2688T&gt;G</Attribute>
</AttributeSet>
<AttributeSet>
<Attribute Accession="NM_001374872" Version="1" Change="c.2688T&gt;G" Type="HGVS, coding, RefSeq">NM_001374872.1:c.2688T&gt;G</Attribute>
</AttributeSet>
<AttributeSet>
<Attribute Accession="NG_016205" Version="1" Change="g.12642T&gt;G" Type="HGVS, genomic, RefSeqGene">NG_016205.1:g.12642T&gt;G</Attribute>
</AttributeSet>
<AttributeSet>
<Attribute Accession="NC_000006" Version="12" Change="g.43046304A&gt;C" Type="HGVS, genomic, top level" integerValue="38">NC_000006.12:g.43046304A&gt;C</Attribute>
</AttributeSet>
<AttributeSet>
<Attribute Accession="NC_000006" Version="11" Change="g.43014042A&gt;C" Type="HGVS, genomic, top level, previous" integerValue="37">NC_000006.11:g.43014042A&gt;C</Attribute>
</AttributeSet>
<AttributeSet>
<Attribute Accession="NM_001168370" Version="1" Change="c.2844T&gt;G" Type="HGVS, previous">NM_001168370.1:c.2844T&gt;G</Attribute>
</AttributeSet>
<AttributeSet>
<Attribute Type="HGVS, protein">p.Y948*</Attribute>
</AttributeSet>
<AttributeSet>
<Attribute Accession="NP_001361802" Version="1" Change="p.Tyr864Ter" Type="HGVS, protein, RefSeq">NP_001361802.1:p.Tyr864Ter</Attribute>
</AttributeSet>
<AttributeSet>
<Attribute Accession="NP_001361803" Version="1" Change="p.Tyr864Ter" Type="HGVS, protein, RefSeq">NP_001361803.1:p.Tyr864Ter</Attribute>
</AttributeSet>
<AttributeSet>
<Attribute Accession="NP_055595" Version="2" Change="p.Tyr864Ter" Type="HGVS, protein, RefSeq">NP_055595.2:p.Tyr864Ter</Attribute>
</AttributeSet>
<AttributeSet>
<Attribute Accession="NP_001161842" Version="2" Change="p.Tyr896Ter" Type="HGVS, protein, RefSeq">NP_001161842.2:p.Tyr896Ter</Attribute>
</AttributeSet>
<AttributeSet>
<Attribute Accession="NP_001361801" Version="1" Change="p.Tyr896Ter" Type="HGVS, protein, RefSeq">NP_001361801.1:p.Tyr896Ter</Attribute>
</AttributeSet>
<AttributeSet>
<Attribute Accession="NP_001161842" Version="1" Change="p.Tyr948Ter" Type="HGVS, protein, RefSeq">NP_001161842.1:p.Tyr948Ter</Attribute>
</AttributeSet>
<AttributeSet>
<Attribute Type="MolecularConsequence">nonsense</Attribute>
<XRef ID="SO:0001587" DB="Sequence Ontology"/>
<XRef ID="NM_001168370.2:c.2688T&gt;G" DB="RefSeq"/>
</AttributeSet>
<AttributeSet>
<Attribute Type="MolecularConsequence">nonsense</Attribute>
<XRef ID="SO:0001587" DB="Sequence Ontology"/>
<XRef ID="NM_001374872.1:c.2688T&gt;G" DB="RefSeq"/>
</AttributeSet>
<AttributeSet>
<Attribute Type="MolecularConsequence">nonsense</Attribute>
<XRef ID="SO:0001587" DB="Sequence Ontology"/>
<XRef ID="NM_001374873.1:c.2592T&gt;G" DB="RefSeq"/>
</AttributeSet>
<AttributeSet>
<Attribute Type="MolecularConsequence">nonsense</Attribute>
<XRef ID="SO:0001587" DB="Sequence Ontology"/>
<XRef ID="NM_001374874.1:c.2592T&gt;G" DB="RefSeq"/>
</AttributeSet>
<AttributeSet>
<Attribute Type="MolecularConsequence">nonsense</Attribute>
<XRef ID="SO:0001587" DB="Sequence Ontology"/>
<XRef ID="NM_014780.5:c.2592T&gt;G" DB="RefSeq"/>
</AttributeSet>
<AttributeSet>
<Attribute Type="ProteinChange1LetterCode">Y864*</Attribute>
</AttributeSet>
<AttributeSet>
<Attribute Type="ProteinChange1LetterCode">Y896*</Attribute>
</AttributeSet>
<AttributeSet>
<Attribute Type="ProteinChange1LetterCode">Y948*</Attribute>
</AttributeSet>
<GlobalMinorAlleleFrequency Value="0.00020" Source="1000 Genomes Project" MinorAllele="G"/>
<CytogeneticLocation>6p21.1</CytogeneticLocation>
<SequenceLocation Assembly="GRCh38" AssemblyAccessionVersion="GCF_000001405.38" AssemblyStatus="current" Chr="6" Accession="NC_000006.12" start="43046304" stop="43046304" display_start="43046304" display_stop="43046304" variantLength="1" positionVCF="43046304" referenceAlleleVCF="A" alternateAlleleVCF="C"/>
<SequenceLocation Assembly="GRCh37" AssemblyAccessionVersion="GCF_000001405.25" AssemblyStatus="previous" Chr="6" Accession="NC_000006.11" start="43014042" stop="43014042" display_start="43014042" display_stop="43014042" variantLength="1" positionVCF="43014042" referenceAlleleVCF="A" alternateAlleleVCF="C"/>
<MeasureRelationship Type="within single gene">
<Name>
<ElementValue Type="Preferred">cullin 7</ElementValue>
</Name>
<Symbol>
<ElementValue Type="Preferred">CUL7</ElementValue>
</Symbol>
<SequenceLocation Assembly="GRCh38" AssemblyAccessionVersion="GCF_000001405.38" AssemblyStatus="current" Chr="6" Accession="NC_000006.12" start="43037617" stop="43053851" display_start="43037617" display_stop="43053851" Strand="-"/>
<SequenceLocation Assembly="GRCh37" AssemblyAccessionVersion="GCF_000001405.25" AssemblyStatus="previous" Chr="6" Accession="NC_000006.11" start="43005354" stop="43021682" display_start="43005354" display_stop="43021682" variantLength="16329" Strand="-"/>
<XRef ID="9820" DB="Gene"/>
<XRef Type="MIM" ID="609577" DB="OMIM"/>
<XRef ID="HGNC:21024" DB="HGNC"/>
</MeasureRelationship>
<XRef Type="rs" ID="201406974" DB="dbSNP"/>
</Measure>
<Name>
<ElementValue Type="Preferred">NM_014780.5(CUL7):c.2592T&gt;G (p.Tyr864Ter)</ElementValue>
</Name>
<Name>
<ElementValue Type="Preferred">NM_014780.5(CUL7):c.2592T&gt;G (p.Tyr864Ter)</ElementValue>
</Name>
<XRef ID="CA236446" DB="ClinGen"/>
</MeasureSet>
<TraitSet Type="Disease" ID="415">
<Trait ID="6044" Type="Disease">
<Name>
<ElementValue Type="Preferred">3M syndrome 1</ElementValue>
<XRef ID="MONDO:0010117" DB="MONDO"/>
</Name>
<Name>
<ElementValue Type="Alternate">Three M syndrome 1</ElementValue>
<XRef ID="Three+M+syndrome/7072" DB="Genetic Alliance"/>
</Name>
<Symbol>
<ElementValue Type="Alternate">3M1</ElementValue>
<XRef Type="MIM" ID="273750" DB="OMIM"/>
</Symbol>
<AttributeSet>
<Attribute Type="public definition">Three M syndrome is characterized by severe pre- and postnatal growth deficiency (final height 5-6 SD below the mean; i.e., 120-130 cm), characteristic facies, and normal intelligence. Additional features of three M syndrome include short broad neck, prominent trapezii, deformed sternum, short thorax, square shoulders, winged scapulae, hyperlordosis, short fifth fingers, prominent heels, and loose joints. Males with three M syndrome have hypogonadism and occasionally hypospadias.</Attribute>
<XRef ID="NBK1481" DB="GeneReviews"/>
</AttributeSet>
<AttributeSet>
<Attribute Type="GARD id" integerValue="15239"/>
<XRef ID="15239" DB="Office of Rare Diseases"/>
</AttributeSet>
<Citation Type="review" Abbrev="GeneReviews">
<ID Source="PubMed">20301654</ID>
<ID Source="BookShelf">NBK1481</ID>
</Citation>
<Citation Type="Translational/Evidence-based" Abbrev="EuroGentest, 2011">
<ID Source="PubMed">21364696</ID>
</Citation>
<XRef ID="MONDO:0010117" DB="MONDO"/>
<XRef ID="C2678312" DB="MedGen"/>
<XRef ID="2616" DB="Orphanet"/>
<XRef Type="MIM" ID="273750" DB="OMIM"/>
</Trait>
</TraitSet>
</ReferenceClinVarAssertion>
<ClinVarAssertion ID="269551" SubmissionName="2045">
<ClinVarSubmissionID localKey="NM_001168370.1:c.2844T&gt;G|OMIM:273750" submitter="Developmental Genetics Unit, King Faisal Specialist Hospital &amp; Research Centre" submitterDate="2014-05-08"/>
<ClinVarAccession Acc="SCV000108552" Version="1" Type="SCV" OrgID="500184" OrganizationCategory="laboratory" OrgType="primary" DateUpdated="2014-05-09" DateCreated="2014-05-09"/>
<RecordStatus>current</RecordStatus>
<ClinicalSignificance>
<ReviewStatus>flagged submission</ReviewStatus>
<Description>Pathogenic</Description>
</ClinicalSignificance>
<Assertion Type="variation to disease"/>
<ObservedIn>
<Sample>
<Origin>germline</Origin>
<Ethnicity>Arab</Ethnicity>
<GeographicOrigin>Saudi Arabia</GeographicOrigin>
<Species TaxonomyId="9606">human</Species>
<AffectedStatus>yes</AffectedStatus>
<Gender>male</Gender>
</Sample>
<Method>
<MethodType>research</MethodType>
</Method>
<ObservedData>
<Attribute Type="Description">not provided</Attribute>
</ObservedData>
<TraitSet Type="Disease">
<Trait ClinicalFeaturesAffectedStatus="present" Type="Finding">
<Name>
<ElementValue Type="Preferred">Small and mal-aligned teeth, skin and joint laxity, and normal motor and cognitive development</ElementValue>
</Name>
</Trait>
</TraitSet>
</ObservedIn>
<MeasureSet Type="Variant">
<Measure Type="Variation">
<Name>
<ElementValue Type="Alternate">p.Y948*</ElementValue>
</Name>
<AttributeSet>
<Attribute Type="FunctionalConsequence">protein loss of function</Attribute>
<XRef DB="Variation Ontology" ID="0043" URL="http://www.variationontology.org/cgi-bin/amivario/term-details.cgi?term=VariO:0043"/>
</AttributeSet>
<AttributeSet>
<Attribute Type="HGVS">NM_001168370.1:c.2844T&gt;G</Attribute>
</AttributeSet>
<MeasureRelationship Type="variant in gene">
<Symbol>
<ElementValue Type="Preferred">CUL7</ElementValue>
</Symbol>
</MeasureRelationship>
</Measure>
</MeasureSet>
<TraitSet Type="Disease">
<Trait Type="Disease">
<Name>
<ElementValue Type="Preferred">Three M syndrome 1</ElementValue>
</Name>
<XRef DB="OMIM" ID="273750" Type="MIM"/>
</Trait>
</TraitSet>
<StudyName>Genomic Analysis of Primordial Dwarfism Reveals Novel Disease Genes</StudyName>
<Comment DataSource="NCBI" Type="FlaggedComment">Reason: This record appears to be redundant with a more recent record from the same submitter.</Comment>
<Comment DataSource="NCBI" Type="FlaggedComment">Notes: SCV000108552 appears to be redundant with SCV000221722.</Comment>
</ClinVarAssertion>
</ClinVarSet>

</ReleaseSet>
Loading

0 comments on commit f144e74

Please sign in to comment.