Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Align the Funcotator checkIfAlreadyAnnotated test with the Funcotator engine code. #7555

Merged
merged 1 commit into from
Nov 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderLine;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.barclay.argparser.ArgumentCollection;
Expand Down Expand Up @@ -826,10 +825,7 @@ public void onTraversalStart() {
*/
@VisibleForTesting
static void checkIfAlreadyAnnotated(final VCFHeader vcfHeader, GATKPath drivingVariantFile) {
if (vcfHeader.getOtherHeaderLine(FuncotatorConstants.VCF_HEADER_ALREADY_ANNOTATED_1) != null) {
throw new UserException.BadInput("Given VCF " +drivingVariantFile+ " has already been annotated!");
}
else if (vcfHeader.getOtherHeaderLine(FuncotatorConstants.VCF_HEADER_ALREADY_ANNOTATED_2) != null) {
if (vcfHeader.getOtherHeaderLine(FuncotatorConstants.FUNCOTATOR_VERSION_VCF_HEADERLINE_KEY) != null) {
throw new UserException.BadInput("Given VCF " +drivingVariantFile+ " has already been annotated!");
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,8 @@ public class FuncotatorConstants {
Arrays.asList(B37_MITOCHONDRIAL_CONTIG_NAME, HG19_MITOCHONDRIAL_CONTIG_NAME, HG38_MITOCHONDRIAL_CONTIG_NAME)
);

/** Name of one of the header line keys which indicates that a file has already been annotated. */
public static final String VCF_HEADER_ALREADY_ANNOTATED_1 = "Funcotator";

/** Name of the second header line key which indicates that a file has already been annotated. */
public static final String VCF_HEADER_ALREADY_ANNOTATED_2 = "Funcotator Version";
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe there may have been a good reason for checking both of these -- see the original PR for context (#7349). @jonn-smith can confirm one way or another when he returns.

Copy link
Collaborator Author

@cmnbroad cmnbroad Nov 9, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, but I can find neither a file that contains the other line, nor code that sets it. Maybe its a legacy thing, but if not I just need to know where the code is that creates the first header line (plain "Funcotator").

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think that there is necessarily a good reason for this check. I think the updates are good.

/** Name of the header line key which indicates that a file has already been annotated. */
public static final String FUNCOTATOR_VERSION_VCF_HEADERLINE_KEY = "Funcotator Version";


}
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package org.broadinstitute.hellbender.tools.funcotator.vcfOutput;

import com.google.common.annotations.VisibleForTesting;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
Expand Down Expand Up @@ -281,7 +280,7 @@ private VCFHeader createVCFHeader() {

// Add in the lines about Funcotations:
headerLines.addAll(defaultToolVcfHeaderLines);
headerLines.add(new VCFHeaderLine("Funcotator Version", toolVersion + " | " + getDataSourceInfoString()));
headerLines.add(new VCFHeaderLine(FuncotatorConstants.FUNCOTATOR_VERSION_VCF_HEADERLINE_KEY, toolVersion + " | " + getDataSourceInfoString()));
headerLines.add(new VCFInfoHeaderLine(FUNCOTATOR_VCF_FIELD_NAME, VCFHeaderLineCount.A,
VCFHeaderLineType.String, "Functional annotation from the Funcotator tool. Funcotation fields are"
+ DESCRIPTION_PREAMBLE_DELIMITER +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,9 @@
import htsjdk.variant.vcf.VCFHeaderLine;
import org.broadinstitute.hellbender.GATKBaseTest;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.util.ArrayList;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;

/**
Expand All @@ -18,114 +15,36 @@
*/
public class FuncotatorUnitTest extends GATKBaseTest {

/**
* Data provider for the unit test for checkIfAlreadyAnnotated. In this case, we have a
* "Funcotator" column in the vcf header, which indicates
* that this vcf has already been annotated and thus the program
* should not run on this input.
*
*/
@DataProvider
Object[][] provideDataForAnnotationCheckWrong() {
final Set<VCFHeaderLine> headerLines = new LinkedHashSet<>();

headerLines.add(new VCFHeaderLine("fileformat", "fileformat"));
headerLines.add(new VCFHeaderLine("FILTER", "FILTER"));
headerLines.add(new VCFHeaderLine("FORMAT", "FORMAT"));
headerLines.add(new VCFHeaderLine("Funcotator", "Funcotator"));
headerLines.add(new VCFHeaderLine("GATKCommandLine", "GATKCommandLine"));
headerLines.add(new VCFHeaderLine("INFO", "INFO"));

return new Object[][] {
{ headerLines }
};
}

/**
* Data provider for the unit test for checkIfAlreadyAnnotated. In this case, we have a
* "Funcotator Version" column in the vcf header, which indicates
* that this vcf has already been annotated and thus the program
* should not run on this input.
*
*/
@DataProvider
Object[][] provideDataForAnnotationCheckWrongSecond() {
final Set<VCFHeaderLine> headerLines = new LinkedHashSet<>();

headerLines.add(new VCFHeaderLine("fileformat", "fileformat"));
headerLines.add(new VCFHeaderLine("FILTER", "FILTER"));
headerLines.add(new VCFHeaderLine("FORMAT", "FORMAT"));
headerLines.add(new VCFHeaderLine("Funcotator Version", "Funcotator Version"));
headerLines.add(new VCFHeaderLine("GATKCommandLine", "GATKCommandLine"));
headerLines.add(new VCFHeaderLine("INFO", "INFO"));

return new Object[][] {
{ headerLines }
};
}


/**
* Data provider for the unit test of our annotation function when the
* given vcf file has not already been annotated. In this case, we see that
* there is no longer a column labelled "Funcotator", and all other columns
* are typically of a vcf file which has not been annotated yet. Thus,
* the function should not throw an error and we should annotate this file.
*
*/
@DataProvider
Object[][] provideDataForAnnotationCheckRight() {
final Set<VCFHeaderLine> headerLines = new LinkedHashSet<>(); //= new List<>();
//return Arrays.asList(
headerLines.add(new VCFHeaderLine("fileformat", "VCF"));
headerLines.add(new VCFHeaderLine("FILTER", "FILTER"));
headerLines.add(new VCFHeaderLine("FORMAT", "FORMAT"));
headerLines.add(new VCFHeaderLine("GATKCommandLine", "GATKCommandLine"));
headerLines.add(new VCFHeaderLine("INFO", "INFO"));

return new Object[][] {
{ headerLines }
};
}


/**
* Unit test to check if our checkIfAlreadyAnnotated function works correctly.
* In this case, we are using the data provider which has an incorrect field name
* in the vcf header, and thus, this function should throw an error because the given
* vcf file has already been annotated and we do not need to annotate again.
*
*/
@Test(dataProvider = "provideDataForAnnotationCheckWrong", expectedExceptions = UserException.BadInput.class)
public void testCheckWhenAlreadyAnnotated(final Set<VCFHeaderLine> vcfHeaderLines) {
final VCFHeader vcfHeader = new VCFHeader(vcfHeaderLines);
Funcotator.checkIfAlreadyAnnotated(vcfHeader, null);
}

/**
* Unit test to check if our checkIfAlreadyAnnotated function works correctly.
* In this case, we are using the data provider which has the second incorrect field name
* In this case, we are using a header which does not have the
* {@link FuncotatorConstants#FUNCOTATOR_VERSION_VCF_HEADERLINE_KEY}
* in the vcf header, and thus, this function should throw an error because the given
* vcf file has already been annotated and we do not need to annotate again.
*
*/
@Test(dataProvider = "provideDataForAnnotationCheckWrongSecond", expectedExceptions = UserException.BadInput.class)
public void testCheckWhenAlreadyAnnotatedSecond(final Set<VCFHeaderLine> vcfHeaderLines) {
@Test(expectedExceptions = UserException.BadInput.class)
public void testCheckWhenAlreadyAnnotated() {
final Set<VCFHeaderLine> vcfHeaderLines = new LinkedHashSet<>();
vcfHeaderLines.add(new VCFHeaderLine(
FuncotatorConstants.FUNCOTATOR_VERSION_VCF_HEADERLINE_KEY,
"content not relevant for this test"));
final VCFHeader vcfHeader = new VCFHeader(vcfHeaderLines);
Funcotator.checkIfAlreadyAnnotated(vcfHeader, null);
}

/**
* Unit test to check if our checkIfAlreadyAnnotated function works correctly.
* In this case, we are using the data provider which does not have any incorrect
* field names in the given vcf header, and thus, this function should
* In this case, we are using the data provider which has the
* {@link FuncotatorConstants#FUNCOTATOR_VERSION_VCF_HEADERLINE_KEY} header line
* in the given vcf header, and thus, this function should
* not throw an error because the given vcf file has not been annotated yet
* and this file should be annotated.
*
*/
@Test(dataProvider = "provideDataForAnnotationCheckRight")
public void testCheckWhenNotAlreadyAnnotated(final Set<VCFHeaderLine> vcfHeaderLines) {
final VCFHeader vcfHeader = new VCFHeader(vcfHeaderLines);
@Test
public void testCheckWhenNotAlreadyAnnotated() {
final VCFHeader vcfHeader = new VCFHeader(new LinkedHashSet<>());
Funcotator.checkIfAlreadyAnnotated(vcfHeader, null);
//this should not throw an exception.
}
Expand Down