Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Now non-locatable data sources can create funcotations again. #5774

Merged
merged 2 commits into from
Mar 8, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,12 @@ public String getVersion() {
return version;
}

/**
* @return {@code True} if this {@link DataSourceFuncotationFactory} requires features to create {@link Funcotation}s. {@code False} otherwise.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any features? Or specific types? Please update docs.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes - any features. If the DataSourceFuncotationFactory relies on feature matching that the GATK engine provides, then this should be true.

Things that don't require features (like gene name matching data sources) don't require any features to be present to create a funcotation.

*/
@VisibleForTesting
public boolean requiresFeatures() { return true; }

/**
* @return An ordered {@link LinkedHashSet} of the names of annotations that this Data Source supports.
*/
Expand Down Expand Up @@ -169,11 +175,15 @@ public List<Funcotation> createFuncotations(final VariantContext variant, final
Utils.nonNull(referenceContext);
Utils.nonNull(featureContext);

// Query this funcotation factory to get the list of overlapping features.
final List<Feature> featureList = queryFeaturesFromFeatureContext(featureContext);

final List<Funcotation> outputFuncotations;

// Query this funcotation factory to get the list of overlapping features.
// NOTE: This will only get features that are LOCATABLE!
// This corresponds to requiresFeatures() returning `True`.
final List<Feature> featureList = requiresFeatures() ?
queryFeaturesFromFeatureContext(featureContext) :
Collections.emptyList();

// If our featureList is compatible with this DataSourceFuncotationFactory, then we make our funcotations:
if ( isFeatureListCompatible(featureList) ) {

Expand Down Expand Up @@ -203,12 +213,22 @@ public List<Funcotation> createFuncotations(final VariantContext variant, final
* Checks to see if the given featureList is compatible with this {@link DataSourceFuncotationFactory}.
* Cues off of the feature type in the feature list and whether the given list contains any non-null features.
* This method acts as a sanity-check before attempting to do any annotations on features.
* If this {@link DataSourceFuncotationFactory} does not require features as per {@link #requiresFeatures()}, then
* this method will always return {@code True}.
* @param featureList {@link List} of {@link Feature} that might be applicable to this {@link DataSourceFuncotationFactory} for annotation.
* @return {@code true} if the given {@code featureList} contains at least one non-null feature of type {@link #getAnnotationFeatureClass()}; {@code false} otherwise.
*/
private boolean isFeatureListCompatible(final List<Feature> featureList) {
// Make sure these features can be annotated by this DataSourceFuncotationFactory:
// Make sure these features can be annotated by this DataSourceFuncotationFactory.
// NOTE: We only check the first non-null element of the list for feature type:

// The feature list is compatible if we found a compatible feature
// OR
// if this DataSourceFuncotationFactory does not require features.
if ( !requiresFeatures() ) {
return true;
}

boolean foundCompatibleFeature = false;
for ( final Feature f : featureList ) {
if (f != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,15 @@ public String getName() {
return name;
}

@Override
/**
* {@inheritDoc}
* Since {@link CosmicFuncotationFactory} primarily keys off a gene name, we don't actually need
* any features to create annotations.
*/
@VisibleForTesting
public boolean requiresFeatures() { return false; }

@Override
public LinkedHashSet<String> getSupportedFuncotationFields() {
return supportedFields;
Expand Down Expand Up @@ -250,7 +259,7 @@ protected List<Funcotation> createFuncotationsOnVariant(final VariantContext var
// Then query our DB for matches on the gene name.
// Then grab Genome position / Protein position and see if we overlap.
// If any do, we create our CosmicFuncotation
for ( final GencodeFuncotation gencodeFuncotation : gencodeFuncotations ) {
for ( final GencodeFuncotation gencodeFuncotation : gencodeFuncotations ) {
final String geneName = gencodeFuncotation.getHugoSymbol();

final SimpleInterval genomePosition = new SimpleInterval(variant.getContig(), variant.getStart(), variant.getEnd());
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.broadinstitute.hellbender.tools.funcotator.dataSources.xsv;

import com.google.common.annotations.VisibleForTesting;
import htsjdk.tribble.Feature;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
Expand Down Expand Up @@ -177,6 +178,15 @@ public String getName() {
return name;
}

@Override
/**
* {@inheritDoc}
* Since {@link SimpleKeyXsvFuncotationFactory} keys off a gene name or transcript ID, we don't actually need
* any features to create annotations.
*/
@VisibleForTesting
public boolean requiresFeatures() { return false; }

@Override
public LinkedHashSet<String> getSupportedFuncotationFields() {
return new LinkedHashSet<>(annotationColumnNames);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ public class FuncotatorIntegrationTest extends CommandLineProgramTest {
private static final String PIK3CA_DBSNP_DS = toolsTestDir + "funcotator" + File.separator + "small_pik3ca_dbsnp_ds";
private static final String MAF_DBSNP_TEST = toolsTestDir + "funcotator" + File.separator + "maf_dbsnp_test_input.vcf";

// Non-locatable funcotation file:
private static final String NON_LOCATABLE_FUNCOTATED_INPUT_VCF = toolsTestDir + "funcotator" + File.separator + "non_locatable_proof_input.vcf";

private static final List<String> VCF_FIELDS_GENCODE_19_DS = Arrays.asList("Gencode_19_hugoSymbol","Gencode_19_ncbiBuild","Gencode_19_chromosome","Gencode_19_start","Gencode_19_end","Gencode_19_variantClassification","Gencode_19_variantType","Gencode_19_refAllele","Gencode_19_tumorSeqAllele1","Gencode_19_tumorSeqAllele2","Gencode_19_genomeChange","Gencode_19_annotationTranscript","Gencode_19_transcriptStrand","Gencode_19_transcriptExon","Gencode_19_transcriptPos","Gencode_19_cDnaChange","Gencode_19_codonChange","Gencode_19_proteinChange","Gencode_19_gcContent","Gencode_19_referenceContext","Gencode_19_otherTranscripts");//,"Achilles_Top_Genes","CGC_Name","CGC_GeneID","CGC_Chr","CGC_Chr_Band","CGC_Cancer_Somatic_Mut","CGC_Cancer_Germline_Mut","CGC_Tumour_Types__(Somatic_Mutations)","CGC_Tumour_Types_(Germline_Mutations)","CGC_Cancer_Syndrome","CGC_Tissue_Type","CGC_Cancer_Molecular_Genetics","CGC_Mutation_Type","CGC_Translocation_Partner","CGC_Other_Germline_Mut","CGC_Other_Syndrome/Disease","ClinVar_HGMD_ID","ClinVar_SYM","ClinVar_TYPE","ClinVar_ASSEMBLY","ClinVar_rs","Cosmic_overlapping_mutations","CosmicFusion_fusion_genes","CosmicFusion_fusion_id","CosmicTissue_total_alterations_in_gene","CosmicTissue_tissue_types_affected","DNARepairGenes_Activity_linked_to_OMIM","DNARepairGenes_Chromosome_location_linked_to_NCBI_MapView","DNARepairGenes_Accession_number_linked_to_NCBI_Entrez","Familial_Cancer_Genes_Syndrome","Familial_Cancer_Genes_Synonym","Familial_Cancer_Genes_Reference","Gencode_XHGNC_hgnc_id","Gencode_XRefSeq_mRNA_id","Gencode_XRefSeq_prot_acc","HGNC_HGNC_ID","HGNC_Approved_Name","HGNC_Status","HGNC_Locus_Type","HGNC_Locus_Group","HGNC_Previous_Symbols","HGNC_Previous_Name","HGNC_Synonyms","HGNC_Name_Synonyms","HGNC_Chromosome","HGNC_Date_Modified","HGNC_Date_Symbol_Changed","HGNC_Date_Name_Changed","HGNC_Accession_Numbers","HGNC_Enzyme_IDs","HGNC_Entrez_Gene_ID","HGNC_Ensembl_Gene_ID","HGNC_Pubmed_IDs","HGNC_RefSeq_IDs","HGNC_Gene_Family_ID","HGNC_Gene_Family_Name","HGNC_CCDS_IDs","HGNC_Vega_ID","HGNC_Entrez_Gene_ID(supplied_by_NCBI)","HGNC_OMIM_ID(supplied_by_OMIM)","HGNC_RefSeq(supplied_by_NCBI)","HGNC_UniProt_ID(supplied_by_UniProt)","HGNC_Ensembl_ID(supplied_by_Ensembl)","HGNC_UCSC_ID(supplied_by_UCSC)","Oreganno_Build","Oreganno_ID","Oreganno_Values","Simple_Uniprot_uniprot_entry_name","Simple_Uniprot_DrugBank","Simple_Uniprot_alt_uniprot_accessions","Simple_Uniprot_uniprot_accession","Simple_Uniprot_GO_Biological_Process","Simple_Uniprot_GO_Cellular_Component","Simple_Uniprot_GO_Molecular_Function","dbSNP_ASP","dbSNP_ASS","dbSNP_CAF","dbSNP_CDA","dbSNP_CFL","dbSNP_COMMON","dbSNP_DSS","dbSNP_G5","dbSNP_G5A","dbSNP_GENEINFO","dbSNP_GNO","dbSNP_HD","dbSNP_INT","dbSNP_KGPhase1","dbSNP_KGPhase3","dbSNP_LSD","dbSNP_MTP","dbSNP_MUT","dbSNP_NOC","dbSNP_NOV","dbSNP_NSF","dbSNP_NSM","dbSNP_NSN","dbSNP_OM","dbSNP_OTH","dbSNP_PM","dbSNP_PMC","dbSNP_R3","dbSNP_R5","dbSNP_REF","dbSNP_RS","dbSNP_RSPOS","dbSNP_RV","dbSNP_S3D","dbSNP_SAO","dbSNP_SLO","dbSNP_SSR","dbSNP_SYN","dbSNP_TOPMED","dbSNP_TPA","dbSNP_U3","dbSNP_U5","dbSNP_VC","dbSNP_VLD","dbSNP_VP","dbSNP_WGT","dbSNP_WTD","dbSNP_dbSNPBuildID");
private static final List<String> VCF_FIELDS_GENCODE_28_DS = Arrays.asList("Gencode_28_hugoSymbol","Gencode_28_ncbiBuild","Gencode_28_chromosome","Gencode_28_start","Gencode_28_end","Gencode_28_variantClassification","Gencode_28_variantType","Gencode_28_refAllele","Gencode_28_tumorSeqAllele1","Gencode_28_tumorSeqAllele2","Gencode_28_genomeChange","Gencode_28_annotationTranscript","Gencode_28_transcriptStrand","Gencode_28_transcriptExon","Gencode_28_transcriptPos","Gencode_28_cDnaChange","Gencode_28_codonChange","Gencode_28_proteinChange","Gencode_28_gcContent","Gencode_28_referenceContext","Gencode_28_otherTranscripts");//,"Achilles_Top_Genes","CGC_Name","CGC_GeneID","CGC_Chr","CGC_Chr_Band","CGC_Cancer_Somatic_Mut","CGC_Cancer_Germline_Mut","CGC_Tumour_Types__(Somatic_Mutations)","CGC_Tumour_Types_(Germline_Mutations)","CGC_Cancer_Syndrome","CGC_Tissue_Type","CGC_Cancer_Molecular_Genetics","CGC_Mutation_Type","CGC_Translocation_Partner","CGC_Other_Germline_Mut","CGC_Other_Syndrome/Disease","ClinVar_HGMD_ID","ClinVar_SYM","ClinVar_TYPE","ClinVar_ASSEMBLY","ClinVar_rs","Cosmic_overlapping_mutations","CosmicFusion_fusion_genes","CosmicFusion_fusion_id","CosmicTissue_total_alterations_in_gene","CosmicTissue_tissue_types_affected","DNARepairGenes_Activity_linked_to_OMIM","DNARepairGenes_Chromosome_location_linked_to_NCBI_MapView","DNARepairGenes_Accession_number_linked_to_NCBI_Entrez","Familial_Cancer_Genes_Syndrome","Familial_Cancer_Genes_Synonym","Familial_Cancer_Genes_Reference","Gencode_XHGNC_hgnc_id","Gencode_XRefSeq_mRNA_id","Gencode_XRefSeq_prot_acc","HGNC_HGNC_ID","HGNC_Approved_Name","HGNC_Status","HGNC_Locus_Type","HGNC_Locus_Group","HGNC_Previous_Symbols","HGNC_Previous_Name","HGNC_Synonyms","HGNC_Name_Synonyms","HGNC_Chromosome","HGNC_Date_Modified","HGNC_Date_Symbol_Changed","HGNC_Date_Name_Changed","HGNC_Accession_Numbers","HGNC_Enzyme_IDs","HGNC_Entrez_Gene_ID","HGNC_Ensembl_Gene_ID","HGNC_Pubmed_IDs","HGNC_RefSeq_IDs","HGNC_Gene_Family_ID","HGNC_Gene_Family_Name","HGNC_CCDS_IDs","HGNC_Vega_ID","HGNC_Entrez_Gene_ID(supplied_by_NCBI)","HGNC_OMIM_ID(supplied_by_OMIM)","HGNC_RefSeq(supplied_by_NCBI)","HGNC_UniProt_ID(supplied_by_UniProt)","HGNC_Ensembl_ID(supplied_by_Ensembl)","HGNC_UCSC_ID(supplied_by_UCSC)","Oreganno_Build","Oreganno_ID","Oreganno_Values","Simple_Uniprot_uniprot_entry_name","Simple_Uniprot_DrugBank","Simple_Uniprot_alt_uniprot_accessions","Simple_Uniprot_uniprot_accession","Simple_Uniprot_GO_Biological_Process","Simple_Uniprot_GO_Cellular_Component","Simple_Uniprot_GO_Molecular_Function","dbSNP_ASP","dbSNP_ASS","dbSNP_CAF","dbSNP_CDA","dbSNP_CFL","dbSNP_COMMON","dbSNP_DSS","dbSNP_G5","dbSNP_G5A","dbSNP_GENEINFO","dbSNP_GNO","dbSNP_HD","dbSNP_INT","dbSNP_KGPhase1","dbSNP_KGPhase3","dbSNP_LSD","dbSNP_MTP","dbSNP_MUT","dbSNP_NOC","dbSNP_NOV","dbSNP_NSF","dbSNP_NSM","dbSNP_NSN","dbSNP_OM","dbSNP_OTH","dbSNP_PM","dbSNP_PMC","dbSNP_R3","dbSNP_R5","dbSNP_REF","dbSNP_RS","dbSNP_RSPOS","dbSNP_RV","dbSNP_S3D","dbSNP_SAO","dbSNP_SLO","dbSNP_SSR","dbSNP_SYN","dbSNP_TOPMED","dbSNP_TPA","dbSNP_U3","dbSNP_U5","dbSNP_VC","dbSNP_VLD","dbSNP_VP","dbSNP_WGT","dbSNP_WTD","dbSNP_dbSNPBuildID");
private static final List<String> MAF_FIELDS_GENCODE_DS = Arrays.asList(MafOutputRendererConstants.FieldName_Hugo_Symbol, MafOutputRendererConstants.FieldName_NCBI_Build, MafOutputRendererConstants.FieldName_Chromosome,
Expand Down Expand Up @@ -1677,5 +1680,57 @@ public void testEnsureDbSnpInMaf() {
.collect(Collectors.toList());
Assert.assertEquals(guessDbSnpValStatus, gtDbSnpValStatus);
}

@Test
public void testCanCreateNonLocatableFuncotations() {

// FuncotatorTestConstants.NON_TRIVIAL_DATA_VALIDATION_TEST_HG19_DATA_SET_1_EXPECTED_OUTPUT

final File outputFile = createTempFile(tmpOutDir + File.separator + NON_LOCATABLE_FUNCOTATED_INPUT_VCF + ".funcotator", ".vcf");

final ArgumentsBuilder arguments = createBaselineArgumentsForFuncotator(
NON_LOCATABLE_FUNCOTATED_INPUT_VCF,
outputFile,
b37Reference,
FuncotatorTestConstants.FUNCOTATOR_DATA_SOURCES_MAIN_FOLDER,
FuncotatorTestConstants.REFERENCE_VERSION_HG19,
FuncotatorArgumentDefinitions.OutputFormatType.VCF,
true);

// Run the tool with our args:
runCommandLine(arguments);

// ===============================

final Pair<VCFHeader, List<VariantContext>> vcfInfo
= VariantContextTestUtils.readEntireVCFIntoMemory(outputFile.getAbsolutePath());

final VCFInfoHeaderLine funcotationHeaderLine = vcfInfo.getLeft().getInfoHeaderLine(VcfOutputRenderer.FUNCOTATOR_VCF_FIELD_NAME);
final String[] funcotationFieldNames = FuncotatorUtils.extractFuncotatorKeysFromHeaderDescription(funcotationHeaderLine.getDescription());

final VariantContext variant = vcfInfo.getRight().get(0);

final Map<Allele, FuncotationMap> alleleToFuncotationMap =
FuncotatorUtils.createAlleleToFuncotationMapFromFuncotationVcfAttribute(
funcotationFieldNames,
variant,
"Gencode_19_annotationTranscript",
"TEST");

// Make sure we get the correct transcript here:
Assert.assertEquals(alleleToFuncotationMap.get(variant.getAlternateAllele(0)).getTranscriptList().size(), 1);

// Now get the transcript annotations:
final List<Funcotation> funcotations = alleleToFuncotationMap.get(variant.getAlternateAllele(0)).get("ENST00000379410.3");

// Now assert that we got what we should have gotten from a few HGNC (non-locatable data source) fields:
Assert.assertEquals(funcotations.get(0).getField("HGNC_HGNC_ID"), "HGNC:25284");
Assert.assertEquals(funcotations.get(0).getField("HGNC_Status"), "Approved");
Assert.assertEquals(funcotations.get(0).getField("HGNC_Locus_Type"), "gene_%20_with_%20_protein_%20_product");
Assert.assertEquals(funcotations.get(0).getField("HGNC_Locus_Group"), "protein-coding_%20_gene");
Assert.assertEquals(funcotations.get(0).getField("HGNC_Previous_Name"), "\"pleckstrin_%20_homology_%20_domain_%20_containing_%2C__%20_family_%20_N_%20_member_%20_1\"");
Assert.assertEquals(funcotations.get(0).getField("HGNC_Synonyms"), "DKFZP434H2010");

}
}

Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,11 @@ private Object[][] provideForTestCreateFuncotations() {
//==================================================================================================================
// Tests:

@Test
public void testRequiresFeatures() {
Assert.assertFalse(new CosmicFuncotationFactory(PATH_TO_TEST_DB).requiresFeatures());
}

@Test(dataProvider = "provideDataForTestProteinPositionRegex")
public void testPositionRegex(final Pattern regex, final List<String> dbPositions, final int expectedNumResults) {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2503,4 +2503,9 @@ public void testCreateFuncotationsWithFlanks(final String expectedGeneName,
Assert.assertEquals(funcotation.getHugoSymbol(), expectedGeneName, "Gene name not correct");
}
}

@Test
public void testRequiresFeatures() {
Assert.assertTrue(testMuc16SnpCreateFuncotationsFuncotationFactory.requiresFeatures());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,11 @@ public void testGetAnnotationFeatureClass() {
Assert.assertEquals(vcfFuncotationFactory.getAnnotationFeatureClass(), VariantContext.class);
}

@Test
public void testRequiresFeatures() {
Assert.assertTrue(createVcfFuncotationFactory(FACTORY_NAME, FACTORY_VERSION, IOUtils.getPath(FuncotatorTestConstants.VARIANT_FILE_HG19_CHR3)).requiresFeatures());
}

@Test
public void testGetType() {
final VcfFuncotationFactory vcfFuncotationFactory = createVcfFuncotationFactory(FACTORY_NAME, FACTORY_VERSION, IOUtils.getPath(FuncotatorTestConstants.VARIANT_FILE_HG19_CHR3));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,11 @@ private Object[][] provideForTestSetSupportedFuncotationFields() {
//==================================================================================================================
// Tests:

@Test
public void testRequiresFeatures() {
Assert.assertTrue(new LocatableXsvFuncotationFactory(LocatableXsvFuncotationFactory.DEFAULT_NAME, DataSourceFuncotationFactory.DEFAULT_VERSION_STRING, new LinkedHashMap<>(), null).requiresFeatures());
}

@Test(dataProvider = "provideForTestGetName")
public void testGetName(final String name, final String expected) {
final LocatableXsvFuncotationFactory locatableXsvFuncotationFactory;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.engine.ReferenceDataSource;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.testutils.FuncotatorReferenceTestUtils;
import org.broadinstitute.hellbender.tools.funcotator.Funcotation;
import org.broadinstitute.hellbender.tools.funcotator.FuncotatorTestConstants;
import org.broadinstitute.hellbender.tools.funcotator.dataSources.TableFuncotation;
import org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation;
import org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotationBuilder;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.io.IOUtils;
import org.broadinstitute.hellbender.testutils.FuncotatorReferenceTestUtils;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
Expand Down Expand Up @@ -425,4 +425,18 @@ public void testCreateFuncotationsIgnoresTranscriptVersions() {
final TableFuncotation tableFuncotation = (TableFuncotation)funcotation;
Assert.assertEquals(tableFuncotation.get(defaultName + "_Beatle"), "Harrison", "Wrong value for the Beatle column in returned funcotation");
}

@Test
public void testRequiresFeatures() {
final SimpleKeyXsvFuncotationFactory simpleKeyXsvFuncotationFactory = new SimpleKeyXsvFuncotationFactory(
defaultName,
IOUtils.getPath(FuncotatorTestConstants.XSV_CSV_FILE_PATH),
"VERSION",
",",
0,
SimpleKeyXsvFuncotationFactory.XsvDataKeyType.GENE_NAME
);

Assert.assertFalse(simpleKeyXsvFuncotationFactory.requiresFeatures());
}
}
Loading