Skip to content

Commit

Permalink
Align the Funcotator checkIfAlreadyAnnotated test with the Funcotator…
Browse files Browse the repository at this point in the history
… engine code. (#7555)
  • Loading branch information
cmnbroad authored Nov 22, 2021
1 parent f06971a commit eeb9e1f
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 107 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderLine;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.barclay.argparser.ArgumentCollection;
Expand Down Expand Up @@ -826,10 +825,7 @@ public void onTraversalStart() {
*/
@VisibleForTesting
static void checkIfAlreadyAnnotated(final VCFHeader vcfHeader, GATKPath drivingVariantFile) {
if (vcfHeader.getOtherHeaderLine(FuncotatorConstants.VCF_HEADER_ALREADY_ANNOTATED_1) != null) {
throw new UserException.BadInput("Given VCF " +drivingVariantFile+ " has already been annotated!");
}
else if (vcfHeader.getOtherHeaderLine(FuncotatorConstants.VCF_HEADER_ALREADY_ANNOTATED_2) != null) {
if (vcfHeader.getOtherHeaderLine(FuncotatorConstants.FUNCOTATOR_VERSION_VCF_HEADERLINE_KEY) != null) {
throw new UserException.BadInput("Given VCF " +drivingVariantFile+ " has already been annotated!");
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,8 @@ public class FuncotatorConstants {
Arrays.asList(B37_MITOCHONDRIAL_CONTIG_NAME, HG19_MITOCHONDRIAL_CONTIG_NAME, HG38_MITOCHONDRIAL_CONTIG_NAME)
);

/** Name of one of the header line keys which indicates that a file has already been annotated. */
public static final String VCF_HEADER_ALREADY_ANNOTATED_1 = "Funcotator";

/** Name of the second header line key which indicates that a file has already been annotated. */
public static final String VCF_HEADER_ALREADY_ANNOTATED_2 = "Funcotator Version";
/** Name of the header line key which indicates that a file has already been annotated. */
public static final String FUNCOTATOR_VERSION_VCF_HEADERLINE_KEY = "Funcotator Version";


}
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package org.broadinstitute.hellbender.tools.funcotator.vcfOutput;

import com.google.common.annotations.VisibleForTesting;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
Expand Down Expand Up @@ -281,7 +280,7 @@ private VCFHeader createVCFHeader() {

// Add in the lines about Funcotations:
headerLines.addAll(defaultToolVcfHeaderLines);
headerLines.add(new VCFHeaderLine("Funcotator Version", toolVersion + " | " + getDataSourceInfoString()));
headerLines.add(new VCFHeaderLine(FuncotatorConstants.FUNCOTATOR_VERSION_VCF_HEADERLINE_KEY, toolVersion + " | " + getDataSourceInfoString()));
headerLines.add(new VCFInfoHeaderLine(FUNCOTATOR_VCF_FIELD_NAME, VCFHeaderLineCount.A,
VCFHeaderLineType.String, "Functional annotation from the Funcotator tool. Funcotation fields are"
+ DESCRIPTION_PREAMBLE_DELIMITER +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,9 @@
import htsjdk.variant.vcf.VCFHeaderLine;
import org.broadinstitute.hellbender.GATKBaseTest;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.util.ArrayList;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;

/**
Expand All @@ -18,114 +15,36 @@
*/
public class FuncotatorUnitTest extends GATKBaseTest {

/**
* Data provider for the unit test for checkIfAlreadyAnnotated. In this case, we have a
* "Funcotator" column in the vcf header, which indicates
* that this vcf has already been annotated and thus the program
* should not run on this input.
*
*/
@DataProvider
Object[][] provideDataForAnnotationCheckWrong() {
final Set<VCFHeaderLine> headerLines = new LinkedHashSet<>();

headerLines.add(new VCFHeaderLine("fileformat", "fileformat"));
headerLines.add(new VCFHeaderLine("FILTER", "FILTER"));
headerLines.add(new VCFHeaderLine("FORMAT", "FORMAT"));
headerLines.add(new VCFHeaderLine("Funcotator", "Funcotator"));
headerLines.add(new VCFHeaderLine("GATKCommandLine", "GATKCommandLine"));
headerLines.add(new VCFHeaderLine("INFO", "INFO"));

return new Object[][] {
{ headerLines }
};
}

/**
* Data provider for the unit test for checkIfAlreadyAnnotated. In this case, we have a
* "Funcotator Version" column in the vcf header, which indicates
* that this vcf has already been annotated and thus the program
* should not run on this input.
*
*/
@DataProvider
Object[][] provideDataForAnnotationCheckWrongSecond() {
final Set<VCFHeaderLine> headerLines = new LinkedHashSet<>();

headerLines.add(new VCFHeaderLine("fileformat", "fileformat"));
headerLines.add(new VCFHeaderLine("FILTER", "FILTER"));
headerLines.add(new VCFHeaderLine("FORMAT", "FORMAT"));
headerLines.add(new VCFHeaderLine("Funcotator Version", "Funcotator Version"));
headerLines.add(new VCFHeaderLine("GATKCommandLine", "GATKCommandLine"));
headerLines.add(new VCFHeaderLine("INFO", "INFO"));

return new Object[][] {
{ headerLines }
};
}


/**
* Data provider for the unit test of our annotation function when the
* given vcf file has not already been annotated. In this case, we see that
* there is no longer a column labelled "Funcotator", and all other columns
* are typically of a vcf file which has not been annotated yet. Thus,
* the function should not throw an error and we should annotate this file.
*
*/
@DataProvider
Object[][] provideDataForAnnotationCheckRight() {
final Set<VCFHeaderLine> headerLines = new LinkedHashSet<>(); //= new List<>();
//return Arrays.asList(
headerLines.add(new VCFHeaderLine("fileformat", "VCF"));
headerLines.add(new VCFHeaderLine("FILTER", "FILTER"));
headerLines.add(new VCFHeaderLine("FORMAT", "FORMAT"));
headerLines.add(new VCFHeaderLine("GATKCommandLine", "GATKCommandLine"));
headerLines.add(new VCFHeaderLine("INFO", "INFO"));

return new Object[][] {
{ headerLines }
};
}


/**
* Unit test to check if our checkIfAlreadyAnnotated function works correctly.
* In this case, we are using the data provider which has an incorrect field name
* in the vcf header, and thus, this function should throw an error because the given
* vcf file has already been annotated and we do not need to annotate again.
*
*/
@Test(dataProvider = "provideDataForAnnotationCheckWrong", expectedExceptions = UserException.BadInput.class)
public void testCheckWhenAlreadyAnnotated(final Set<VCFHeaderLine> vcfHeaderLines) {
final VCFHeader vcfHeader = new VCFHeader(vcfHeaderLines);
Funcotator.checkIfAlreadyAnnotated(vcfHeader, null);
}

/**
* Unit test to check if our checkIfAlreadyAnnotated function works correctly.
* In this case, we are using the data provider which has the second incorrect field name
* In this case, we are using a header which does not have the
* {@link FuncotatorConstants#FUNCOTATOR_VERSION_VCF_HEADERLINE_KEY}
* in the vcf header, and thus, this function should throw an error because the given
* vcf file has already been annotated and we do not need to annotate again.
*
*/
@Test(dataProvider = "provideDataForAnnotationCheckWrongSecond", expectedExceptions = UserException.BadInput.class)
public void testCheckWhenAlreadyAnnotatedSecond(final Set<VCFHeaderLine> vcfHeaderLines) {
@Test(expectedExceptions = UserException.BadInput.class)
public void testCheckWhenAlreadyAnnotated() {
final Set<VCFHeaderLine> vcfHeaderLines = new LinkedHashSet<>();
vcfHeaderLines.add(new VCFHeaderLine(
FuncotatorConstants.FUNCOTATOR_VERSION_VCF_HEADERLINE_KEY,
"content not relevant for this test"));
final VCFHeader vcfHeader = new VCFHeader(vcfHeaderLines);
Funcotator.checkIfAlreadyAnnotated(vcfHeader, null);
}

/**
* Unit test to check if our checkIfAlreadyAnnotated function works correctly.
* In this case, we are using the data provider which does not have any incorrect
* field names in the given vcf header, and thus, this function should
* In this case, we are using the data provider which has the
* {@link FuncotatorConstants#FUNCOTATOR_VERSION_VCF_HEADERLINE_KEY} header line
* in the given vcf header, and thus, this function should
* not throw an error because the given vcf file has not been annotated yet
* and this file should be annotated.
*
*/
@Test(dataProvider = "provideDataForAnnotationCheckRight")
public void testCheckWhenNotAlreadyAnnotated(final Set<VCFHeaderLine> vcfHeaderLines) {
final VCFHeader vcfHeader = new VCFHeader(vcfHeaderLines);
@Test
public void testCheckWhenNotAlreadyAnnotated() {
final VCFHeader vcfHeader = new VCFHeader(new LinkedHashSet<>());
Funcotator.checkIfAlreadyAnnotated(vcfHeader, null);
//this should not throw an exception.
}
Expand Down

0 comments on commit eeb9e1f

Please sign in to comment.