Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes dropped AS_QD bug #6168

Merged
merged 11 commits into from
Oct 3, 2019
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import org.broadinstitute.hellbender.engine.FeatureContext;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.tools.walkers.annotator.*;
import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AS_QualByDepth;
import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AS_RMSMappingQuality;
import org.broadinstitute.hellbender.tools.walkers.genotyper.*;
import org.broadinstitute.hellbender.utils.GATKProtectedVariantContextUtils;
Expand Down Expand Up @@ -93,7 +94,8 @@ private void initialize()
}

// We only want the engine to generate the AS_QUAL key if we are using AlleleSpecific annotations.
genotypingEngine = new MinimalGenotypingEngine(createUAC(), samples, annotationEngine.isRequestedReducibleRawKey(GATKVCFConstants.AS_QUAL_KEY));
genotypingEngine = new MinimalGenotypingEngine(createUAC(), samples,
annotationEngine.getInfoAnnotations().stream().anyMatch(c -> c instanceof AS_QualByDepth));

if ( includeNonVariants ) {
// Save INFO header names that require alt alleles
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,16 @@ public final class RMSMappingQuality extends InfoFieldAnnotation implements Stan
public boolean allowOlderRawKeyValues = false;

@Override
public String getRawKeyName() { return GATKVCFConstants.RAW_MAPPING_QUALITY_WITH_DEPTH_KEY;} //new key for the two-value MQ data to prevent version mismatch catastrophes
public List<String> getRawKeyNames() { return Arrays.asList(GATKVCFConstants.RAW_MAPPING_QUALITY_WITH_DEPTH_KEY);} //new key for the two-value MQ data to prevent version mismatch catastrophes

public static String getDeprecatedRawKeyName() { return GATKVCFConstants.RAW_RMS_MAPPING_QUALITY_KEY;} //new key for the two-value MQ data to prevent version mismatch catastrophes
public static String getDeprecatedRawKeyName() { return GATKVCFConstants.RAW_RMS_MAPPING_QUALITY_KEY;} //old key that used the janky depth estimation method

@Override
public List<String> getKeyNames() {
return Arrays.asList(VCFConstants.RMS_MAPPING_QUALITY_KEY, getRawKeyName());
final List<String> allKeys = new ArrayList<>();
allKeys.add(VCFConstants.RMS_MAPPING_QUALITY_KEY);
allKeys.addAll(getRawKeyNames());
return allKeys;
}

@Override
Expand All @@ -75,7 +78,11 @@ public List<VCFInfoHeaderLine> getDescriptions() {

@Override
public List<VCFInfoHeaderLine> getRawDescriptions() {
return Arrays.asList(GATKVCFHeaderLines.getInfoLine(getRawKeyName()));
final List<VCFInfoHeaderLine> lines = new ArrayList<>(1);
for (final String rawKey : getRawKeyNames()) {
lines.add(GATKVCFHeaderLines.getInfoLine(rawKey));
}
return lines;
}

/**
Expand All @@ -94,7 +101,7 @@ public Map<String, Object> annotateRawData(final ReferenceContext ref,
final ReducibleAnnotationData<List<Long>> myData = new ReducibleAnnotationData<>(null);
calculateRawData(vc, likelihoods, myData);
final String annotationString = makeRawAnnotationString(vc.getAlleles(), myData.getAttributeMap());
annotations.put(getRawKeyName(), annotationString);
annotations.put(getRawKeyNames().get(0), annotationString);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps you should write a method that searches the raw key names for the index of the key you want instead of assuming it's the first item. At the very least, declare a named constant for 0 and add some comments explaining your assumptions.

Or, here's another thought: if you can only ever have either 1 or 2 raw keys in this brave new world, maybe you should implement named methods for each of the two keys -- something like getPrimaryRawKeyName() and getSecondaryRawKeyName() (or some more meaningful method names), instead of relying on indices. When you want them both together you can still call getRawKeyNames().

return annotations;
}

Expand All @@ -110,7 +117,7 @@ public Map<String, Object> combineRawData(final List<Allele> vcAlleles, final Li
}
final Map<String, Object> annotations = new HashMap<>();
String annotationString = makeRawAnnotationString(vcAlleles, combinedData.getAttributeMap());
annotations.put(getRawKeyName(), annotationString);
annotations.put(getRawKeyNames().get(0), annotationString);
return annotations;
}

Expand All @@ -122,13 +129,13 @@ private String makeRawAnnotationString(final List<Allele> vcAlleles, final Map<A
@SuppressWarnings({"unchecked", "rawtypes"})//FIXME generics here blow up
public Map<String, Object> finalizeRawData(final VariantContext vc, final VariantContext originalVC) {
String rawMQdata;
if (vc.hasAttribute(getRawKeyName())) {
rawMQdata = vc.getAttributeAsString(getRawKeyName(), null);
if (vc.hasAttribute(getRawKeyNames().get(0))) {
rawMQdata = vc.getAttributeAsString(getRawKeyNames().get(0), null);
}
else if (vc.hasAttribute(getDeprecatedRawKeyName())) {
if (!allowOlderRawKeyValues) {
throw new UserException.BadInput("Presence of '-"+getDeprecatedRawKeyName()+"' annotation is detected. This GATK version expects key "
+ getRawKeyName() + " with a tuple of sum of squared MQ values and total reads over variant "
+ getRawKeyNames().get(0) + " with a tuple of sum of squared MQ values and total reads over variant "
+ "genotypes as the value. This could indicate that the provided input was produced with an older version of GATK. " +
"Use the argument '--"+RMS_MAPPING_QUALITY_OLD_BEHAVIOR_OVERRIDE_ARGUMENT+"' to override and attempt the deprecated MQ calculation. There " +
"may be differences in how newer GATK versions calculate DP and MQ that may result in worse MQ results. Use at your own risk.");
Expand All @@ -148,7 +155,7 @@ else if (vc.hasAttribute(getDeprecatedRawKeyName())) {
}
else {
logger.warn("MQ annotation data is not properly formatted. This GATK version expects key "
+ getRawKeyName() + " with a tuple of sum of squared MQ values and total reads over variant "
+ getRawKeyNames().get(0) + " with a tuple of sum of squared MQ values and total reads over variant "
+ "genotypes as the value. Attempting to use deprecated MQ calculation.");
final long numOfReads = getNumOfReads(vc, null);
rawMQdata = Math.round(Double.parseDouble(rawMQdata)) + "," + numOfReads; //deprecated format was double so it needs to be converted to long
Expand Down Expand Up @@ -237,7 +244,7 @@ static String formattedValue(double rms) {
* otherwise return the original vc
*/
public VariantContext finalizeRawMQ(final VariantContext vc) {
final String rawMQdata = vc.getAttributeAsString(getRawKeyName(), null);
final String rawMQdata = vc.getAttributeAsString(getRawKeyNames().get(0), null);
if (rawMQdata == null) {
if (!vc.hasAttribute(GATKVCFConstants.MAPPING_QUALITY_DEPTH_DEPRECATED)) {
return vc;
Expand All @@ -250,7 +257,7 @@ public VariantContext finalizeRawMQ(final VariantContext vc) {
final String finalizedRMSMAppingQuality = formattedValue(rms);
return new VariantContextBuilder(vc)
.rmAttribute(getDeprecatedRawKeyName()) //some old GVCFs that were reblocked for gnomAD have both
.rmAttribute(getRawKeyName())
.rmAttributes(getRawKeyNames())
.attribute(getKeyNames().get(0), finalizedRMSMAppingQuality)
.make();
}
Expand All @@ -261,7 +268,7 @@ public VariantContext finalizeRawMQ(final VariantContext vc) {
final String finalizedRMSMAppingQuality = formattedValue(rms);
return new VariantContextBuilder(vc)
.rmAttribute(getDeprecatedRawKeyName()) //some old GVCFs that were reblocked for gnomAD have both
.rmAttribute(getRawKeyName())
.rmAttributes(getRawKeyNames())
.attribute(getKeyNames().get(0), finalizedRMSMAppingQuality)
.make();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils;

import java.util.*;
Expand Down Expand Up @@ -75,7 +76,8 @@ public VariantAnnotatorEngine(final Collection<Annotation> annotationList,
keepRawCombinedAnnotations = keepCombined;
for (InfoFieldAnnotation annot : infoAnnotations) {
if (annot instanceof ReducibleAnnotation) {
reducibleKeys.add(((ReducibleAnnotation) annot).getRawKeyName());
for (final String rawKey : ((ReducibleAnnotation) annot).getRawKeyNames())
reducibleKeys.add(rawKey);
ldgauthier marked this conversation as resolved.
Show resolved Hide resolved
}
}
}
Expand Down Expand Up @@ -198,12 +200,16 @@ public Map<String, Object> combineAnnotations(final List<Allele> allelesList, Ma
for (final InfoFieldAnnotation annotationType : infoAnnotations) {
if (annotationType instanceof ReducibleAnnotation) {
ReducibleAnnotation currentASannotation = (ReducibleAnnotation) annotationType;
if (annotationMap.containsKey(currentASannotation.getRawKeyName())) {
final List<ReducibleAnnotationData<?>> annotationValue = (List<ReducibleAnnotationData<?>>) annotationMap.get(currentASannotation.getRawKeyName());
final Map<String, Object> annotationsFromCurrentType = currentASannotation.combineRawData(allelesList, annotationValue);
combinedAnnotations.putAll(annotationsFromCurrentType);
//remove the combined annotations so that the next method only processes the non-reducible ones
annotationMap.remove(currentASannotation.getRawKeyName());
for (final String rawKey : currentASannotation.getRawKeyNames()) {
if (annotationMap.containsKey(rawKey)) {
final List<ReducibleAnnotationData<?>> annotationValue = (List<ReducibleAnnotationData<?>>) annotationMap.get(currentASannotation.getRawKeyNames().get(0));
final Map<String, Object> annotationsFromCurrentType = currentASannotation.combineRawData(allelesList, annotationValue);
combinedAnnotations.putAll(annotationsFromCurrentType);
//remove all the raw keys for the annotation because we already used all of them in combineRawData
for (final String rk : currentASannotation.getRawKeyNames()) {
annotationMap.remove(rk);
ldgauthier marked this conversation as resolved.
Show resolved Hide resolved
}
}
}
}
}
Expand All @@ -230,11 +236,14 @@ public VariantContext finalizeAnnotations(VariantContext vc, VariantContext orig
variantAnnotations.putAll(annotationsFromCurrentType);
}
//clean up raw annotation data after annotations are finalized
if (!keepRawCombinedAnnotations) {
variantAnnotations.remove(currentASannotation.getRawKeyName());
for (final String rawKey: currentASannotation.getRawKeyNames()) {
if (!keepRawCombinedAnnotations) {
variantAnnotations.remove(rawKey);
ldgauthier marked this conversation as resolved.
Show resolved Hide resolved
}
}
}
}
variantAnnotations.remove(GATKVCFConstants.AS_QUAL_KEY); //this is manual because the "rawKeys" get added by genotyping

// generate a new annotated VC
final VariantContextBuilder builder = new VariantContextBuilder(vc).attributes(variantAnnotations);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import org.broadinstitute.hellbender.tools.walkers.annotator.BaseQualityRankSumTest;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.read.ReadUtils;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;

import java.util.Arrays;
Expand Down Expand Up @@ -43,7 +42,7 @@ public List<String> getKeyNames() {
}

@Override
public String getRawKeyName() { return GATKVCFConstants.AS_RAW_BASE_QUAL_RANK_SUM_KEY;}
public List<String> getRawKeyNames() { return Arrays.asList(GATKVCFConstants.AS_RAW_BASE_QUAL_RANK_SUM_KEY);}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The indexing scheme you added in the most recent revision is an improvement, but it's a bit brittle, since getRawKeyNames() doesn't directly call into getPrimaryRawKeyIndex() when constructing its list. In theory, someone could modify one method but not the other...

I think you should either:

  1. Come up with a scheme where getRawKeyNames() can call directly into the method(s) defining key order
    (best solution, but a little work)

or:

  1. Write a unit test for every single getRawKeyNames() method that asserts that you get the expected keys in the expected order, and also for the getPrimaryRawKeyIndex() methods asserting that you always get 0.


/**
* Get the element for the given read at the given reference position
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ public class AS_MappingQualityRankSumTest extends AS_RankSumTest implements AS_S
public List<String> getKeyNames() { return Arrays.asList(GATKVCFConstants.AS_MAP_QUAL_RANK_SUM_KEY); }

@Override
public String getRawKeyName() { return GATKVCFConstants.AS_RAW_MAP_QUAL_RANK_SUM_KEY;}
public List<String> getRawKeyNames() { return Arrays.asList(GATKVCFConstants.AS_RAW_MAP_QUAL_RANK_SUM_KEY);}

@Override
protected OptionalDouble getElementForRead(final GATKRead read, final int refLoc) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ public class AS_QualByDepth extends InfoFieldAnnotation implements ReducibleAnno
public List<String> getKeyNames() { return Arrays.asList(GATKVCFConstants.AS_QUAL_BY_DEPTH_KEY); }

@Override
public String getRawKeyName() { return GATKVCFConstants.AS_RAW_QUAL_APPROX_KEY;}
public List<String> getRawKeyNames() { return Arrays.asList(GATKVCFConstants.AS_RAW_QUAL_APPROX_KEY, GATKVCFConstants.AS_QUAL_KEY);}

@Override
public List<VCFInfoHeaderLine> getRawDescriptions() {
Expand Down Expand Up @@ -144,8 +144,10 @@ public Map<String, Object> finalizeRawData(VariantContext vc, VariantContext ori

final Map<String, Object> map = new HashMap<>();
map.put(getKeyNames().get(0), AnnotationUtils.encodeValueList(QDlist, "%.2f"));
//keep AS_QUALapprox for Gnarly Pipeline because we don't subset alts or output genotypes if there are more than 6 alts
map.put(GATKVCFConstants.AS_RAW_QUAL_APPROX_KEY, StringUtils.join(alleleQualList, AnnotationUtils.LIST_DELIMITER));
if (vc.hasAttribute(GATKVCFConstants.AS_RAW_QUAL_APPROX_KEY)) {
//keep AS_QUALapprox for Gnarly Pipeline because we don't subset alts or output genotypes if there are more than 6 alts
map.put(GATKVCFConstants.AS_RAW_QUAL_APPROX_KEY, StringUtils.join(alleleQualList, AnnotationUtils.LIST_DELIMITER));
}
return map;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ public Map<String, Object> annotateRawData(final ReferenceContext ref,
final ReducibleAnnotationData<Double> myData = new ReducibleAnnotationData<>(null);
getRMSDataFromLikelihoods(likelihoods, myData);
final String annotationString = makeRawAnnotationString(vc.getAlleles(), myData.getAttributeMap());
annotations.put(getRawKeyName(), annotationString);
annotations.put(getRawKeyNames().get(0), annotationString);
return annotations;
}

Expand Down Expand Up @@ -119,7 +119,7 @@ public Map<String, Object> combineRawData(final List<Allele> vcAlleles, final Li
}
final Map<String, Object> annotations = new HashMap<>();
String annotationString = makeRawAnnotationString(vcAlleles, combinedData.getAttributeMap());
annotations.put(getRawKeyName(), annotationString);
annotations.put(getRawKeyNames().get(0), annotationString);
return annotations;
}

Expand Down Expand Up @@ -157,10 +157,10 @@ protected void parseRawDataString(final ReducibleAnnotationData<Double> myData)
@Override
@SuppressWarnings({"unchecked", "rawtypes"})//FIXME generics here blow up
public Map<String, Object> finalizeRawData(final VariantContext vc, final VariantContext originalVC) {
if (!vc.hasAttribute(getRawKeyName())) {
if (!vc.hasAttribute(getRawKeyNames().get(0))) {
return new HashMap<>();
}
final String rawMQdata = vc.getAttributeAsString(getRawKeyName(),null);
final String rawMQdata = vc.getAttributeAsString(getRawKeyNames().get(0),null);
if (rawMQdata == null) {
return new HashMap<>();
}
Expand All @@ -179,7 +179,7 @@ public Map<String, Object> finalizeRawData(final VariantContext vc, final Varian
public List<String> getKeyNames() { return Arrays.asList(GATKVCFConstants.AS_RMS_MAPPING_QUALITY_KEY); }

@Override
public String getRawKeyName() { return GATKVCFConstants.AS_RAW_RMS_MAPPING_QUALITY_KEY; }
public List<String> getRawKeyNames() { return Arrays.asList(GATKVCFConstants.AS_RAW_RMS_MAPPING_QUALITY_KEY); }

private void getRMSDataFromLikelihoods(final AlleleLikelihoods<GATKRead, Allele> likelihoods, ReducibleAnnotationData<Double> myData) {
for ( final AlleleLikelihoods<GATKRead, Allele>.BestAllele bestAllele : likelihoods.bestAllelesBreakingTies() ) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ public Map<String, Object> annotateRawData(final ReferenceContext ref,
if (annotationString == null){
return Collections.emptyMap();
}
return Collections.singletonMap(getRawKeyName(), annotationString);
return Collections.singletonMap(getRawKeyNames().get(0), annotationString);
}

/**
Expand Down Expand Up @@ -178,10 +178,10 @@ private void calculateRawData(VariantContext vc, final AlleleLikelihoods<GATKRea
* @return
*/
public Map<String, Object> finalizeRawData(final VariantContext vc, final VariantContext originalVC) {
if (!vc.hasAttribute(getRawKeyName())) {
if (!vc.hasAttribute(getRawKeyNames().get(0))) {
return new HashMap<>();
}
final String rawRankSumData = vc.getAttributeAsString(getRawKeyName(),null);
final String rawRankSumData = vc.getAttributeAsString(getRawKeyNames().get(0),null);
if (rawRankSumData == null) {
return new HashMap<>();
}
Expand Down Expand Up @@ -230,7 +230,7 @@ public Map<String, Object> combineRawData(final List<Allele> vcAlleles, final Li

}
final String annotationString = makeCombinedAnnotationString(vcAlleles, combinedData.getAttributeMap());
return Collections.singletonMap(getRawKeyName(), annotationString);
return Collections.singletonMap(getRawKeyNames().get(0), annotationString);
}

// Parses the raw data string into a Histogram and sets the inputs attribute map accordingly
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,7 @@
import org.broadinstitute.hellbender.tools.walkers.annotator.ReadPosRankSumTest;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
import org.broadinstitute.hellbender.utils.pileup.PileupElement;
import org.broadinstitute.hellbender.utils.read.AlignmentUtils;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.read.ReadUtils;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;

import java.util.Arrays;
Expand Down Expand Up @@ -48,7 +45,7 @@ public class AS_ReadPosRankSumTest extends AS_RankSumTest implements AS_Standard
public List<String> getKeyNames() { return Arrays.asList(GATKVCFConstants.AS_READ_POS_RANK_SUM_KEY); }

@Override
public String getRawKeyName() { return GATKVCFConstants.AS_RAW_READ_POS_RANK_SUM_KEY;}
public List<String> getRawKeyNames() { return Arrays.asList(GATKVCFConstants.AS_RAW_READ_POS_RANK_SUM_KEY);}

@Override
protected OptionalDouble getElementForRead(final GATKRead read, final int refLoc) {
Expand Down
Loading