Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ReblockGVCFs cleanup #8411

Merged
merged 2 commits into from
Jul 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ public final class ReblockGVCF extends MultiVariantWalker {
public static final String RGQ_THRESHOLD_SHORT_NAME = "rgq-threshold";
public static final String TREE_SCORE_THRESHOLD_LONG_NAME = "tree-score-threshold-to-no-call";
public static final String ANNOTATIONS_TO_KEEP_LONG_NAME = "annotations-to-keep";
public static final String ANNOTATIONS_TO_REMOVE_LONG_NAME = "format-annotations-to-remove";
public static final String KEEP_ALL_ALTS_ARG_NAME = "keep-all-alts";
public static final String QUAL_APPROX_LONG_NAME = "do-qual-score-approximation";
public static final String QUAL_APPROX_SHORT_NAME = "do-qual-approx";
Expand Down Expand Up @@ -154,6 +155,10 @@ public final class ReblockGVCF extends MultiVariantWalker {
@Argument(fullName=ANNOTATIONS_TO_KEEP_LONG_NAME, doc="Annotations that are not recognized by GATK to keep, that should be kept in final GVCF at variant sites.", optional = true)
private List<String> annotationsToKeep = new ArrayList<>();

@Advanced
@Argument(fullName=ANNOTATIONS_TO_REMOVE_LONG_NAME, doc="FORMAT level annotations to remove from all genotypes in final GVCF.", optional = true)
private List<String> annotationsToRemove = new ArrayList<>();

@Advanced
@Argument(fullName=QUAL_APPROX_LONG_NAME, shortName=QUAL_APPROX_SHORT_NAME, doc="Add necessary INFO field annotation to perform QUAL approximation downstream; required for GnarlyGenotyper", optional = true)
protected boolean doQualApprox = false;
Expand Down Expand Up @@ -227,6 +232,9 @@ public void onTraversalStart() {
+ ", but the " + GATKVCFConstants.TREE_SCORE + " annotation is not present in the input GVCF.");
}

List<String> missingAnnotationsToRemove = annotationsToRemove.stream().filter(a -> inputHeader.getFormatHeaderLine(a)==null).toList();
missingAnnotationsToRemove.forEach(a -> logger.warn("FORMAT level annotation " + a + ", which was requested to be removed by --" + ANNOTATIONS_TO_REMOVE_LONG_NAME + ", not found in input GVCF header."));

final Set<VCFHeaderLine> inputHeaders = inputHeader.getMetaDataInSortedOrder();

final Set<VCFHeaderLine> headerLines = new HashSet<>(inputHeaders);
Expand Down Expand Up @@ -317,7 +325,27 @@ protected void createAnnotationEngine() {
// get VariantContexts from input gVCFs and regenotype
@Override
public void apply(VariantContext variant, ReadsContext reads, ReferenceContext ref, FeatureContext features) {
regenotypeVC(variant);
if (!variant.hasAllele(Allele.NON_REF_ALLELE)) {
throw new UserException("Variant Context at " + variant.getContig() + ":" + variant.getStart() + " does not contain a <NON-REF> allele. This tool is only intended for use with GVCFs.");
}
VariantContext newVC = annotationsToRemove.size() > 0 ? removeVCFFormatAnnotations(variant) : variant;
regenotypeVC(newVC);
}

/**
* Remove format level annotations from genotype in variant context.
*
* @param vc variant context to remove format annotations from
* @return variant context with format annotations removed from genotype
*/
private VariantContext removeVCFFormatAnnotations(final VariantContext vc) {
final Genotype genotype = vc.getGenotype(0);
Map<String, Object> extendedAttributes = genotype.getExtendedAttributes();
for (String annotation : annotationsToRemove) {
extendedAttributes.remove(annotation);
}
final Genotype newGenotype = new GenotypeBuilder(genotype).noAttributes().attributes(extendedAttributes).make();
return new VariantContextBuilder(vc).genotypes(newGenotype).make();
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import org.broadinstitute.hellbender.GATKBaseTest;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.engine.FeatureDataSource;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.testutils.ArgumentsBuilder;
import org.broadinstitute.hellbender.testutils.CommandLineProgramTester;
Expand Down Expand Up @@ -575,4 +576,35 @@
Assert.assertFalse(filteredRefBlockVC.isFiltered()); // Ref block is unfiltered even though the input RefBlock and low qual variant were both filtered
Assert.assertEquals(filteredRefBlockVC.getGenotype(0).getDP(), 12); // Ref block is combination of filtered variant with depth 22 and filtered ref block with depth 1
}

@Test
public void testRemovingFormatAnnotations() {
final File input = getTestFile("dragen.g.vcf");
final File output = createTempFile("reblockedgvcf", ".vcf");
final String priKey = "PRI";

Check warning on line 584 in src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCFIntegrationTest.java

View check run for this annotation

Codecov / codecov/patch

src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCFIntegrationTest.java#L582-L584

Added lines #L582 - L584 were not covered by tests

final ArgumentsBuilder args = new ArgumentsBuilder();
args.addReference(new File(hg38Reference))
.add("V", input)
.add(ReblockGVCF.ANNOTATIONS_TO_REMOVE_LONG_NAME, priKey)
.addOutput(output);
runCommandLine(args);

Check warning on line 591 in src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCFIntegrationTest.java

View check run for this annotation

Codecov / codecov/patch

src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCFIntegrationTest.java#L586-L591

Added lines #L586 - L591 were not covered by tests

final List<VariantContext> outVCs = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath()).getRight();

Check warning on line 593 in src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCFIntegrationTest.java

View check run for this annotation

Codecov / codecov/patch

src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCFIntegrationTest.java#L593

Added line #L593 was not covered by tests
for(VariantContext vc : outVCs){
Assert.assertNull(vc.getGenotype(0).getExtendedAttribute(priKey));
}
}

Check warning on line 597 in src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCFIntegrationTest.java

View check run for this annotation

Codecov / codecov/patch

src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCFIntegrationTest.java#L595-L597

Added lines #L595 - L597 were not covered by tests

@Test
public void testNonGVCFInput() {
final File output = createTempFile("reblockedgvcf", ".vcf");

Check warning on line 601 in src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCFIntegrationTest.java

View check run for this annotation

Codecov / codecov/patch

src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCFIntegrationTest.java#L601

Added line #L601 was not covered by tests

final ArgumentsBuilder args = new ArgumentsBuilder();
args.addReference(new File(b37_reference_20_21))
.add("V", "src/test/resources/large/NA12878.HiSeq.WGS.b37_decoy.indel.recalibrated.chr20.vcf")
.addOutput(output);

Check warning on line 606 in src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCFIntegrationTest.java

View check run for this annotation

Codecov / codecov/patch

src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCFIntegrationTest.java#L603-L606

Added lines #L603 - L606 were not covered by tests

Assert.assertThrows(GATKException.class, () -> runCommandLine(args));
}

Check warning on line 609 in src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCFIntegrationTest.java

View check run for this annotation

Codecov / codecov/patch

src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCFIntegrationTest.java#L608-L609

Added lines #L608 - L609 were not covered by tests
}