Skip to content

Commit

Permalink
LocatableXsv data sources now produce at most 1 funcotation per allel…
Browse files Browse the repository at this point in the history
…e pair. (#4936)

Fixes #4929
  • Loading branch information
jonn-smith authored Jun 21, 2018
1 parent fb4c7a1 commit 02f3cf7
Show file tree
Hide file tree
Showing 4 changed files with 269 additions and 39 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,9 @@ protected List<Funcotation> createFuncotationsOnVariant(final VariantContext var
annotatedAltAlleles.add(altAllele);
}
}

// TODO: Must break the loop now to prevent multiple entries messing up the number of fields in the funcotation (issue #4930 - https://github.com/broadinstitute/gatk/issues/4930)
break;
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,18 +50,19 @@ public class FuncotatorIntegrationTest extends CommandLineProgramTest {
private static final boolean doDebugTests = false;
private static final String LARGE_DATASOURCES_FOLDER = "funcotator_dataSources_latest";

private static final String PIK3CA_VCF_HG19 = toolsTestDir + "funcotator" + File.separator + "0816201804HC0_R01C01.pik3ca.vcf";
private static final String PIK3CA_VCF_HG38 = toolsTestDir + "funcotator" + File.separator + "hg38_trio.pik3ca.vcf";
private static final String PIK3CA_VCF_HG19_SNPS = toolsTestDir + "funcotator" + File.separator + "PIK3CA_SNPS_3.vcf";
private static final String PIK3CA_VCF_HG19_INDELS = toolsTestDir + "funcotator" + File.separator + "PIK3CA_INDELS_3.vcf";
private static final String MUC16_VCF_HG19 = toolsTestDir + "funcotator" + File.separator + "MUC16_MNP.vcf";
private static final String PIK3CA_VCF_HG19_ALTS = toolsTestDir + "funcotator" + File.separator + "PIK3CA_3_miss_clinvar_alt_only.vcf";
private static final String SPANNING_DEL_VCF = toolsTestDir + "funcotator" + File.separator + "spanning_del.vcf";
private static final String DS_PIK3CA_DIR = largeFileTestDir + "funcotator" + File.separator + "small_ds_pik3ca" + File.separator;
private static final String DS_MUC16_DIR = largeFileTestDir + "funcotator" + File.separator + "small_ds_muc16" + File.separator;
private static final String MAF_TEST_CONFIG = toolsTestDir + "funcotator" + File.separator + "maf.config";
private static final String XSV_CLINVAR_COL_TEST_VCF = toolsTestDir + "funcotator" + File.separator + "clinvar_hg19_column_test.vcf";
private static final String DS_XSV_CLINVAR_COL_TEST = largeFileTestDir + "funcotator" + File.separator + "small_ds_clinvar_hg19" + File.separator;
private static final String PIK3CA_VCF_HG19 = toolsTestDir + "funcotator" + File.separator + "0816201804HC0_R01C01.pik3ca.vcf";
private static final String PIK3CA_VCF_HG38 = toolsTestDir + "funcotator" + File.separator + "hg38_trio.pik3ca.vcf";
private static final String PIK3CA_VCF_HG19_SNPS = toolsTestDir + "funcotator" + File.separator + "PIK3CA_SNPS_3.vcf";
private static final String PIK3CA_VCF_HG19_INDELS = toolsTestDir + "funcotator" + File.separator + "PIK3CA_INDELS_3.vcf";
private static final String MUC16_VCF_HG19 = toolsTestDir + "funcotator" + File.separator + "MUC16_MNP.vcf";
private static final String PIK3CA_VCF_HG19_ALTS = toolsTestDir + "funcotator" + File.separator + "PIK3CA_3_miss_clinvar_alt_only.vcf";
private static final String SPANNING_DEL_VCF = toolsTestDir + "funcotator" + File.separator + "spanning_del.vcf";
private static final String DS_PIK3CA_DIR = largeFileTestDir + "funcotator" + File.separator + "small_ds_pik3ca" + File.separator;
private static final String DS_MUC16_DIR = largeFileTestDir + "funcotator" + File.separator + "small_ds_muc16" + File.separator;
private static final String MAF_TEST_CONFIG = toolsTestDir + "funcotator" + File.separator + "maf.config";
private static final String XSV_CLINVAR_COL_TEST_VCF = toolsTestDir + "funcotator" + File.separator + "clinvar_hg19_column_test.vcf";
private static final String XSV_CLINVAR_MULTIHIT_TEST_VCF = toolsTestDir + "funcotator" + File.separator + "clinvar_hg19_multihit_test.vcf";
private static final String DS_XSV_CLINVAR_TESTS = largeFileTestDir + "funcotator" + File.separator + "small_ds_clinvar_hg19" + File.separator;

private static String hg38Chr3Ref;
private static String b37Chr3Ref;
Expand Down Expand Up @@ -433,17 +434,35 @@ public void testCanAnnotateMixedContigHg19Clinvar() {
.count(), NUM_CLINVAR_HITS);
}


private void checkVariantContextFuncotationResultsForParseability(final Pair<VCFHeader, List<VariantContext>> vcfInfo, final String[] funcotationFieldNames) {
for (final VariantContext vc : vcfInfo.getRight() ) {
final String funcotation = vc.getAttributeAsString(VcfOutputRenderer.FUNCOTATOR_VCF_FIELD_NAME, "");

Assert.assertNotEquals(funcotation, "", "Funcotation string is empty!");

final String rawFuncotations = funcotation.substring(1,funcotation.length()-1);

Assert.assertEquals(StringUtils.countMatches(rawFuncotations, VcfOutputRenderer.FIELD_DELIMITER), funcotationFieldNames.length - 1, "Found unexpected number of funcotation delimiters (indicating wrong number of funcotations)!");

// This is here to make sure we can create the FuncotationMap object without exploding.
// It serves as a secondary check.
final FuncotationMap funkyMap = FuncotationMap.createAsAllTableFuncotationsFromVcf(FuncotationMap.NO_TRANSCRIPT_AVAILABLE_KEY, funcotationFieldNames,
funcotation, vc.getAlternateAllele(0), "VCF");
}
}

@Test
public void testXsvLocatableAnnotationsHaveCorrectColsForOnlyOnePositionSpecified() {
public void testXsvLocatableAnnotationsHaveOnlyOneEntryForMultiHitLocations() {
final FuncotatorArgumentDefinitions.OutputFormatType outputFormatType = FuncotatorArgumentDefinitions.OutputFormatType.VCF;
final File outputFile = getOutputFile(outputFormatType);

final ArgumentsBuilder arguments = new ArgumentsBuilder();

arguments.addVCF(new File(XSV_CLINVAR_COL_TEST_VCF));
arguments.addVCF(new File(XSV_CLINVAR_MULTIHIT_TEST_VCF));
arguments.addOutput(outputFile);
arguments.addReference(new File(b37Chr2Ref));
arguments.addArgument(FuncotatorArgumentDefinitions.DATA_SOURCES_PATH_LONG_NAME, DS_XSV_CLINVAR_COL_TEST);
arguments.addArgument(FuncotatorArgumentDefinitions.DATA_SOURCES_PATH_LONG_NAME, DS_XSV_CLINVAR_TESTS);
arguments.addArgument(FuncotatorArgumentDefinitions.REFERENCE_VERSION_LONG_NAME, FuncotatorTestConstants.REFERENCE_VERSION_HG19);
arguments.addArgument(FuncotatorArgumentDefinitions.OUTPUT_FORMAT_LONG_NAME, outputFormatType.toString());
arguments.addBooleanArgument(FuncotatorArgumentDefinitions.ALLOW_HG19_GENCODE_B37_CONTIG_MATCHING_LONG_NAME, true);
Expand All @@ -455,23 +474,38 @@ public void testXsvLocatableAnnotationsHaveCorrectColsForOnlyOnePositionSpecifie

final String[] funcotationFieldNames = FuncotatorUtils.extractFuncotatorKeysFromHeaderDescription(funcotationHeaderLine.getDescription());

final int EXPECTED_NUM_VARIANTS = 10;
Assert.assertEquals(vcfInfo.getRight().size(), EXPECTED_NUM_VARIANTS);
final int EXPECTED_NUM_VARIANTS = 1;
Assert.assertEquals(vcfInfo.getRight().size(), EXPECTED_NUM_VARIANTS, "Found more than " + EXPECTED_NUM_VARIANTS + " variants!");

for (final VariantContext vc : vcfInfo.getRight() ) {
final String funcotation = vc.getAttributeAsString(VcfOutputRenderer.FUNCOTATOR_VCF_FIELD_NAME, "");
checkVariantContextFuncotationResultsForParseability(vcfInfo, funcotationFieldNames);
}

Assert.assertNotEquals(funcotation, "");
@Test
public void testXsvLocatableAnnotationsHaveCorrectColsForOnlyOnePositionSpecified() {
final FuncotatorArgumentDefinitions.OutputFormatType outputFormatType = FuncotatorArgumentDefinitions.OutputFormatType.VCF;
final File outputFile = getOutputFile(outputFormatType);

final String rawFuncotations = funcotation.substring(1,funcotation.length()-1);
final ArgumentsBuilder arguments = new ArgumentsBuilder();

Assert.assertEquals(StringUtils.countMatches(rawFuncotations, VcfOutputRenderer.FIELD_DELIMITER), funcotationFieldNames.length - 1);
arguments.addVCF(new File(XSV_CLINVAR_COL_TEST_VCF));
arguments.addOutput(outputFile);
arguments.addReference(new File(b37Chr2Ref));
arguments.addArgument(FuncotatorArgumentDefinitions.DATA_SOURCES_PATH_LONG_NAME, DS_XSV_CLINVAR_TESTS);
arguments.addArgument(FuncotatorArgumentDefinitions.REFERENCE_VERSION_LONG_NAME, FuncotatorTestConstants.REFERENCE_VERSION_HG19);
arguments.addArgument(FuncotatorArgumentDefinitions.OUTPUT_FORMAT_LONG_NAME, outputFormatType.toString());
arguments.addBooleanArgument(FuncotatorArgumentDefinitions.ALLOW_HG19_GENCODE_B37_CONTIG_MATCHING_LONG_NAME, true);

// This is here to make sure we can create the FuncotationMap object without exploding.
// It serves as a secondary check.
final FuncotationMap funkyMap = FuncotationMap.createAsAllTableFuncotationsFromVcf(FuncotationMap.NO_TRANSCRIPT_AVAILABLE_KEY, funcotationFieldNames,
funcotation, vc.getAlternateAllele(0), "VCF");
}
runCommandLine(arguments);

final Pair<VCFHeader, List<VariantContext>> vcfInfo = VariantContextTestUtils.readEntireVCFIntoMemory(outputFile.getAbsolutePath());
final VCFInfoHeaderLine funcotationHeaderLine = vcfInfo.getLeft().getInfoHeaderLine(VcfOutputRenderer.FUNCOTATOR_VCF_FIELD_NAME);

final String[] funcotationFieldNames = FuncotatorUtils.extractFuncotatorKeysFromHeaderDescription(funcotationHeaderLine.getDescription());

final int EXPECTED_NUM_VARIANTS = 10;
Assert.assertEquals(vcfInfo.getRight().size(), EXPECTED_NUM_VARIANTS);

checkVariantContextFuncotationResultsForParseability(vcfInfo, funcotationFieldNames);
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,15 +185,6 @@ private Object[][] provideForTestCreateFuncotations() {
Arrays.asList(null, null, null), Collections.emptyList(),
Collections.singletonList(TableFuncotation.create(reportableFieldNames, emptyFieldList, defaultAltAllele, defaultDataSourceName, null))
),
// // Trivial case where no XsvTableFeatures are in the list:
// helpProvideForTestCreateFuncotations(
// "chr3", 178866314, 178866314,
// "C", defaultAltAllele.getBaseString(), FuncotatorReferenceTestUtils.retrieveHg19Chr3Ref(),
// reportableFieldNames,
// Collections.singletonList(new DummyTestFeature("chr3", 178866314,178866314)),
// Collections.emptyList(),
// Collections.singletonList(TableFuncotation.create(reportableFieldNames, emptyFieldList, defaultAltAllele, defaultDataSourceName))
// ),
// One XsvTableFeature in list
helpProvideForTestCreateFuncotations(
"chr3", 178866314, 178866314,
Expand All @@ -214,7 +205,9 @@ private Object[][] provideForTestCreateFuncotations() {
xsvTableFeature1, xsvTableFeature2
),
Collections.emptyList(),
Arrays.asList(TableFuncotation.create(xsvTableFeature1, defaultAltAllele, defaultDataSourceName, null), TableFuncotation.create(xsvTableFeature2, defaultAltAllele, defaultDataSourceName, null))
// TODO: Commented out because of issue #4930. When issue is fixed, revert to this test case! (https://github.com/broadinstitute/gatk/issues/4930)
// Arrays.asList(TableFuncotation.create(xsvTableFeature1, defaultAltAllele, defaultDataSourceName, null), TableFuncotation.create(xsvTableFeature2, defaultAltAllele, defaultDataSourceName, null))
Collections.singletonList(TableFuncotation.create(xsvTableFeature1, defaultAltAllele, defaultDataSourceName, null))
),
// Many XsvTableFeatures in list
helpProvideForTestCreateFuncotations(
Expand All @@ -225,7 +218,9 @@ private Object[][] provideForTestCreateFuncotations() {
xsvTableFeature1, xsvTableFeature2, xsvTableFeature3
),
Collections.emptyList(),
Arrays.asList(TableFuncotation.create(xsvTableFeature1, defaultAltAllele, defaultDataSourceName, null), TableFuncotation.create(xsvTableFeature2, defaultAltAllele, defaultDataSourceName, null), TableFuncotation.create(xsvTableFeature3, defaultAltAllele, defaultDataSourceName, null))
// TODO: Commented out because of issue #4930. When issue is fixed, revert to this test case! (https://github.com/broadinstitute/gatk/issues/4930)
// Arrays.asList(TableFuncotation.create(xsvTableFeature1, defaultAltAllele, defaultDataSourceName, null), TableFuncotation.create(xsvTableFeature2, defaultAltAllele, defaultDataSourceName, null), TableFuncotation.create(xsvTableFeature3, defaultAltAllele, defaultDataSourceName, null))
Collections.singletonList(TableFuncotation.create(xsvTableFeature1, defaultAltAllele, defaultDataSourceName, null))
),
// Many XsvTableFeatures in list and non-empty GencodeFuncotations
helpProvideForTestCreateFuncotations(
Expand All @@ -238,7 +233,9 @@ private Object[][] provideForTestCreateFuncotations() {
Collections.singletonList(
new GencodeFuncotationBuilder().setChromosome("chr3").setStart(178866314).setEnd(178866314).build()
),
Arrays.asList(TableFuncotation.create(xsvTableFeature1, defaultAltAllele, defaultDataSourceName, null), TableFuncotation.create(xsvTableFeature2, defaultAltAllele, defaultDataSourceName, null), TableFuncotation.create(xsvTableFeature3, defaultAltAllele, defaultDataSourceName, null))
// TODO: Commented out because of issue #4930. When issue is fixed, revert to this test case! (https://github.com/broadinstitute/gatk/issues/4930)
// Arrays.asList(TableFuncotation.create(xsvTableFeature1, defaultAltAllele, defaultDataSourceName, null), TableFuncotation.create(xsvTableFeature2, defaultAltAllele, defaultDataSourceName, null), TableFuncotation.create(xsvTableFeature3, defaultAltAllele, defaultDataSourceName, null))
Collections.singletonList(TableFuncotation.create(xsvTableFeature1, defaultAltAllele, defaultDataSourceName, null))
),
};
}
Expand Down
Loading

0 comments on commit 02f3cf7

Please sign in to comment.