-
Notifications
You must be signed in to change notification settings - Fork 589
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
The rest of the mitochondria joint calling pipeline #5673
Changes from 2 commits
5cdebaa
9e33107
19d3261
a566ac8
09c9488
1a56b6e
b364b5c
ca3a2f3
84abe1e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,10 +5,7 @@ | |
import htsjdk.samtools.util.Locatable; | ||
import htsjdk.variant.variantcontext.*; | ||
import htsjdk.variant.variantcontext.writer.VariantContextWriter; | ||
import htsjdk.variant.vcf.VCFConstants; | ||
import htsjdk.variant.vcf.VCFHeader; | ||
import htsjdk.variant.vcf.VCFHeaderLine; | ||
import htsjdk.variant.vcf.VCFStandardHeaderLines; | ||
import htsjdk.variant.vcf.*; | ||
import org.broadinstitute.barclay.argparser.Argument; | ||
import org.broadinstitute.barclay.argparser.ArgumentCollection; | ||
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; | ||
|
@@ -25,6 +22,7 @@ | |
import org.broadinstitute.hellbender.utils.IntervalUtils; | ||
import org.broadinstitute.hellbender.utils.SimpleInterval; | ||
import org.broadinstitute.hellbender.utils.genotyper.IndexedSampleList; | ||
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; | ||
import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; | ||
|
||
import java.io.File; | ||
|
@@ -80,6 +78,9 @@ public final class CombineGVCFs extends MultiVariantWalkerGroupedOnStart { | |
|
||
public static final String BP_RES_LONG_NAME = "convert-to-base-pair-resolution"; | ||
public static final String BREAK_BANDS_LONG_NAME = "break-bands-at-multiples-of"; | ||
public static final String USE_SOMATIC_LONG_NAME = "input-is-somatic"; | ||
public static final String DROP_SOMATIC_FILTERING_ANNOTATIONS_LONG_NAME = "drop-somatic-filtering-annotations"; | ||
public static final String ALLELE_FRACTION_DELTA_LONG_NAME = "allele-fraction-error"; | ||
|
||
@Argument(fullName= StandardArgumentDefinitions.OUTPUT_LONG_NAME, | ||
shortName=StandardArgumentDefinitions.OUTPUT_SHORT_NAME, | ||
|
@@ -99,6 +100,20 @@ public final class CombineGVCFs extends MultiVariantWalkerGroupedOnStart { | |
@Argument(fullName=BREAK_BANDS_LONG_NAME, doc = "If > 0, reference bands will be broken up at genomic positions that are multiples of this number", optional=true) | ||
protected int multipleAtWhichToBreakBands = 0; | ||
|
||
/** | ||
* Merge somatic GVCFs, retaining LOD and haplotype event count information in FORMAT field | ||
* Note that the Mutect2 reference confidence mode is in BETA -- the likelihoods model and output format are subject to change in subsequent versions. | ||
*/ | ||
@Argument(fullName=USE_SOMATIC_LONG_NAME, doc = "Merge input GVCFs according to somatic (i.e. Mutect2) annotations (BETA)") | ||
protected boolean somaticInput = false; | ||
|
||
/** | ||
* Rather than move the per-sample INFO annotations used for filtering to the FORMAT field, drop them entirely. | ||
* This makes the FORMAT field more readable and reduces file sizes for large cohorts. | ||
*/ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So the current plan is to filter at the single sample level and then merge with no callset level filters, is that right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There will be callset-level filters. If any sample has a PASS variant the site will be PASS. If all variants are filtered the site will get all the filters. The original filters will go to the genotype filter (GF) field. |
||
@Argument(fullName=DROP_SOMATIC_FILTERING_ANNOTATIONS_LONG_NAME, doc = "For input somatic GVCFs (i.e. from Mutect2) drop filtering annotations") | ||
protected boolean dropSomaticFilteringAnnotations = false; | ||
|
||
@Override | ||
public boolean useVariantAnnotations() { return true;} | ||
|
||
|
@@ -233,12 +248,16 @@ private void resizeReferenceIfNeeded(SimpleInterval intervalToClose) { | |
|
||
@Override | ||
public void onTraversalStart() { | ||
if (somaticInput) { | ||
logger.warn("Note that the Mutect2 reference confidence mode is in BETA -- the likelihoods model and output format are subject to change in subsequent versions."); | ||
} | ||
|
||
// create the annotation engine | ||
annotationEngine = new VariantAnnotatorEngine(makeVariantAnnotations(), dbsnp.dbsnp, Collections.emptyList(), false); | ||
|
||
vcfWriter = getVCFWriter(); | ||
|
||
referenceConfidenceVariantContextMerger = new ReferenceConfidenceVariantContextMerger(annotationEngine, getHeaderForVariants()); | ||
referenceConfidenceVariantContextMerger = new ReferenceConfidenceVariantContextMerger(annotationEngine, getHeaderForVariants(), somaticInput, dropSomaticFilteringAnnotations); | ||
|
||
//now that we have all the VCF headers, initialize the annotations (this is particularly important to turn off RankSumTest dithering in integration tests)' | ||
sequenceDictionary = getBestAvailableSequenceDictionary(); | ||
|
@@ -266,6 +285,23 @@ private VariantContextWriter getVCFWriter() { | |
VCFStandardHeaderLines.addStandardInfoLines(headerLines, true, VCFConstants.DBSNP_KEY); | ||
} | ||
|
||
if (somaticInput) { | ||
//single-sample M2 variant filter status will get moved to genotype filter | ||
headerLines.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_FILTER_KEY)); | ||
|
||
if (!dropSomaticFilteringAnnotations) { | ||
//standard M2 INFO annotations for filtering will get moved to FORMAT field | ||
//TODO: I don't have a great solution for using the same descriptions other than copying | ||
headerLines.add(new VCFFormatHeaderLine(GATKVCFConstants.MEDIAN_BASE_QUALITY_KEY, VCFHeaderLineCount.R, VCFHeaderLineType.Integer, "median base quality")); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For all of the
To that you could add a method
And then invoke as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For the annotations that are not of that type but are in
You could invoke as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This solutions get around the issue of having two independent definitions of the corresponding INFO and FORMAT lines that could get out of sync. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I like the getEquivalentFormatHeaderLine idea, and since that takes a string, which I already have in a list (which I moved to Mutect2FilteringEngine, but am open to suggestions) I think I have a nice elegant solution. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not entirely elegant because I still had to add your header lines to the master list, but not too bad. Much better than the reflection rabbit hole I started down. |
||
headerLines.add(new VCFFormatHeaderLine(GATKVCFConstants.MEDIAN_MAPPING_QUALITY_KEY, VCFHeaderLineCount.R, VCFHeaderLineType.Integer, "median mapping quality")); | ||
headerLines.add(new VCFFormatHeaderLine(GATKVCFConstants.MEDIAN_READ_POSITON_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "median distance from end of read")); | ||
headerLines.add(new VCFFormatHeaderLine(GATKVCFConstants.MEDIAN_FRAGMENT_LENGTH_KEY, VCFHeaderLineCount.R, VCFHeaderLineType.Integer, "median fragment length")); | ||
headerLines.add(new VCFFormatHeaderLine(GATKVCFConstants.EVENT_COUNT_IN_HAPLOTYPE_KEY, 1, VCFHeaderLineType.Integer, "Number of events in this haplotype")); | ||
headerLines.add(new VCFFormatHeaderLine(GATKVCFConstants.STRAND_ARTIFACT_AF_KEY, 3, VCFHeaderLineType.Float, "MAP estimates of allele fraction given z")); | ||
headerLines.add(new VCFFormatHeaderLine(GATKVCFConstants.STRAND_ARTIFACT_POSTERIOR_KEY, 3, VCFHeaderLineType.Float, "posterior probabilities of the presence of strand artifact")); | ||
} | ||
} | ||
|
||
VariantContextWriter writer = createVCFWriter(outputFile); | ||
|
||
final Set<String> sampleNameSet = new IndexedSampleList(samples).asSetOfSamples(); | ||
|
@@ -406,12 +442,11 @@ private VariantContext referenceBlockMerge(final List<VariantContext> vcs, final | |
|
||
// genotypes | ||
final GenotypesContext genotypes = GenotypesContext.create(); | ||
for ( final VariantContext vc : vcs ) { | ||
for ( final Genotype g : vc.getGenotypes() ) { | ||
for (final VariantContext vc : vcs) { | ||
for (final Genotype g : vc.getGenotypes()) { | ||
genotypes.add(new GenotypeBuilder(g).alleles(GATKVariantContextUtils.noCallAlleles(g.getPloidy())).make()); | ||
} | ||
} | ||
|
||
return new VariantContextBuilder("", first.getContig(), start, end, Arrays.asList(refAllele, Allele.NON_REF_ALLELE)).attributes(attrs).genotypes(genotypes).make(); | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,18 +15,26 @@ | |
import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AS_RMSMappingQuality; | ||
import org.broadinstitute.hellbender.tools.walkers.genotyper.*; | ||
import org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc.GeneralPloidyFailOverAFCalculatorProvider; | ||
import org.broadinstitute.hellbender.tools.walkers.mutect.FilterMutectCalls; | ||
import org.broadinstitute.hellbender.tools.walkers.mutect.M2ArgumentCollection; | ||
import org.broadinstitute.hellbender.tools.walkers.mutect.M2FiltersArgumentCollection; | ||
import org.broadinstitute.hellbender.utils.GATKProtectedVariantContextUtils; | ||
import org.broadinstitute.hellbender.utils.SimpleInterval; | ||
import org.broadinstitute.hellbender.utils.Utils; | ||
import org.broadinstitute.hellbender.utils.genotyper.IndexedSampleList; | ||
import org.broadinstitute.hellbender.utils.genotyper.SampleList; | ||
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; | ||
import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines; | ||
import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; | ||
import org.broadinstitute.hellbender.utils.variant.HomoSapiensConstants; | ||
|
||
import java.io.File; | ||
import java.util.*; | ||
import java.util.stream.Collectors; | ||
|
||
import static org.broadinstitute.hellbender.tools.walkers.CombineGVCFs.ALLELE_FRACTION_DELTA_LONG_NAME; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
import static org.broadinstitute.hellbender.tools.walkers.CombineGVCFs.USE_SOMATIC_LONG_NAME; | ||
|
||
/** | ||
* Perform joint genotyping on one or more samples pre-called with HaplotypeCaller | ||
* | ||
|
@@ -92,14 +100,38 @@ public final class GenotypeGVCFs extends VariantLocusWalker { | |
public static final String ALL_SITES_LONG_NAME = "include-non-variant-sites"; | ||
public static final String ALL_SITES_SHORT_NAME = "all-sites"; | ||
private static final String GVCF_BLOCK = "GVCFBlock"; | ||
private VCFHeader outputHeader; | ||
|
||
|
||
@Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, | ||
doc="File to which variants should be written", optional=false) | ||
private File outputFile; | ||
|
||
@Argument(fullName=ALL_SITES_LONG_NAME, shortName="all-sites", doc="Include loci found to be non-variant after genotyping", optional=true) | ||
@Argument(fullName=ALL_SITES_LONG_NAME, shortName=ALL_SITES_SHORT_NAME, doc="Include loci found to be non-variant after genotyping", optional=true) | ||
private boolean includeNonVariants = false; | ||
|
||
/** | ||
* "Genotype" somatic GVCFs, outputting genotypes according to confidently called alt alleles, which may lead to inconsistent ploidy | ||
* Note that the Mutect2 reference confidence mode is in BETA -- the likelihoods model and output format are subject to change in subsequent versions. | ||
*/ | ||
@Argument(fullName=USE_SOMATIC_LONG_NAME, doc = "Finalize input GVCF according to somatic (i.e. Mutect2) TLODs (BETA feature)") | ||
protected boolean somaticInput = false; | ||
|
||
/** | ||
* Only variants with tumor LODs exceeding this threshold will be written to the VCF, regardless of filter status. | ||
* Set to less than or equal to tumor_lod. Increase argument value to reduce false positives in the callset. | ||
*/ | ||
@Argument(fullName=M2ArgumentCollection.EMISSION_LOD_LONG_NAME, shortName = M2ArgumentCollection.EMISSION_LOG_SHORT_NAME, | ||
doc = "LOD threshold to emit variant to VCF.") | ||
protected double tlodThreshold = 0.8 * M2FiltersArgumentCollection.DEFAULT_TLOD_FILTER_THRESHOLD; //allow for some lower quality variants | ||
|
||
|
||
/** | ||
* Margin of error in allele fraction to consider a somatic variant homoplasmic, i.e. if there is less than a 0.1% reference allele fraction, those reads are likely errors | ||
*/ | ||
@Argument(fullName=ALLELE_FRACTION_DELTA_LONG_NAME, doc = "Margin of error in allele fraction to consider a somatic variant homoplasmic") | ||
protected double afTolerance = 1e-3; //based on Q30 as a "good" base quality score | ||
|
||
@ArgumentCollection | ||
private GenotypeCalculationArgumentCollection genotypeArgs = new GenotypeCalculationArgumentCollection(); | ||
|
||
|
@@ -152,6 +184,10 @@ public List<Class<? extends Annotation>> getDefaultVariantAnnotationGroups() { | |
|
||
@Override | ||
public void onTraversalStart() { | ||
if (somaticInput) { | ||
logger.warn("Note that the Mutect2 reference confidence mode is in BETA -- the likelihoods model and output format are subject to change in subsequent versions."); | ||
} | ||
|
||
if (!includeNonVariants) { | ||
changeTraversalModeToByVariant(); | ||
} | ||
|
@@ -185,7 +221,7 @@ public void onTraversalStart() { | |
// We only want the engine to generate the AS_QUAL key if we are using AlleleSpecific annotations. | ||
genotypingEngine = new MinimalGenotypingEngine(createUAC(), samples, new GeneralPloidyFailOverAFCalculatorProvider(genotypeArgs), annotationEngine.isRequestedReducibleRawKey(GATKVCFConstants.AS_QUAL_KEY)); | ||
|
||
merger = new ReferenceConfidenceVariantContextMerger(annotationEngine, getHeaderForVariants()); | ||
merger = new ReferenceConfidenceVariantContextMerger(annotationEngine, getHeaderForVariants(), somaticInput); | ||
|
||
if ( includeNonVariants ) { | ||
// Save INFO header names that require alt alleles | ||
|
@@ -205,7 +241,7 @@ private static boolean annotationShouldBeSkippedForHomRefSites(VariantAnnotation | |
return annotation instanceof RankSumTest || annotation instanceof RMSMappingQuality || annotation instanceof AS_RMSMappingQuality; | ||
} | ||
|
||
private void setupVCFWriter(VCFHeader inputVCFHeader, SampleList samples) { | ||
private void setupVCFWriter(final VCFHeader inputVCFHeader, final SampleList samples) { | ||
final Set<VCFHeaderLine> headerLines = new LinkedHashSet<>(inputVCFHeader.getMetaDataInInputOrder()); | ||
headerLines.addAll(getDefaultToolVCFHeaderLines()); | ||
|
||
|
@@ -227,8 +263,8 @@ private void setupVCFWriter(VCFHeader inputVCFHeader, SampleList samples) { | |
vcfWriter = createVCFWriter(outputFile); | ||
|
||
final Set<String> sampleNameSet = samples.asSetOfSamples(); | ||
final VCFHeader vcfHeader = new VCFHeader(headerLines, new TreeSet<>(sampleNameSet)); | ||
vcfWriter.writeHeader(vcfHeader); | ||
outputHeader = new VCFHeader(headerLines, new TreeSet<>(sampleNameSet)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you inline this and remove it from the class's members? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nope, I need it for some of the per-allele annotation subsetting magic later on. |
||
vcfWriter.writeHeader(outputHeader); | ||
} | ||
|
||
@Override | ||
|
@@ -238,7 +274,12 @@ public void apply(final Locatable loc, List<VariantContext> variants, ReadsConte | |
|
||
ref.setWindow(10, 10); //TODO this matches the gatk3 behavior but may be unnecessary | ||
final VariantContext mergedVC = merger.merge(variantsToProcess, loc, includeNonVariants ? ref.getBase() : null, !includeNonVariants, false); | ||
final VariantContext regenotypedVC = regenotypeVC(mergedVC, ref, features, includeNonVariants); | ||
final VariantContext regenotypedVC; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You probably already knew by now that I would suggest using a ternary here. That doesn't mean you have to accept the suggestion, though. |
||
if (somaticInput) { | ||
regenotypedVC = regenotypeSomaticVC(mergedVC, ref, features, includeNonVariants); | ||
} else { | ||
regenotypedVC = regenotypeVC(mergedVC, ref, features, includeNonVariants); | ||
} | ||
if (regenotypedVC != null) { | ||
final SimpleInterval variantStart = new SimpleInterval(regenotypedVC.getContig(), regenotypedVC.getStart(), regenotypedVC.getStart()); | ||
if (!GATKVariantContextUtils.isSpanningDeletionOnly(regenotypedVC) && | ||
|
@@ -363,6 +404,107 @@ private VariantContext removeNonRefAlleles(final VariantContext vc) { | |
} | ||
} | ||
|
||
/** | ||
* Re-genotype (and re-annotate) a combined genomic VC | ||
* @return a new VariantContext or null if the site turned monomorphic and we don't want such sites | ||
*/ | ||
private VariantContext regenotypeSomaticVC(final VariantContext originalVC, final ReferenceContext ref, final FeatureContext features, boolean includeNonVariants) { | ||
Utils.nonNull(originalVC); | ||
|
||
final VariantContext result; | ||
if ( originalVC.isVariant() && originalVC.getAttributeAsInt(VCFConstants.DEPTH_KEY,0) > 0 ) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My vote is to replace this all with
which might strike you as ghastly. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nope, nested ternaries violate my personal style code. |
||
result = callSomaticGenotypes(originalVC); | ||
} else if (includeNonVariants) { | ||
result = originalVC; | ||
} | ||
else { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. move this last |
||
result = null; | ||
} | ||
return result; | ||
} | ||
|
||
|
||
/** | ||
* Drop low quality alleles and call genotypes | ||
* CombineGVCFs will convert calls to no-call (of varying ploidy, as is the case in somatic) | ||
* | ||
* @param vc input VariantContext with no-called genotypes | ||
* @return a VC with called genotypes and low quality alleles removed, may be null | ||
*/ | ||
private VariantContext callSomaticGenotypes(final VariantContext vc) { | ||
final VariantContextBuilder builder = new VariantContextBuilder(vc); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This can wait to be declared until after the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually, I see it used far below, but that distance is so great I would try to make the uses more local, maybe by using two different builders. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd rather stick with one builder since this method is going to get called for every single variant and I don't want to waste time setting up another builder. Unless it's not as slow as I think it is? |
||
GenotypeBuilder gb; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This doesn't need to be defined outside the |
||
final List<Genotype> newGenotypes = new ArrayList<>(); | ||
final GenotypesContext genotypes = vc.getGenotypes(); | ||
final double[] perAlleleLikelihoodSums = new double[vc.getAlleles().size()]; //needs the ref for the subsetting utils | ||
|
||
for(final Genotype g : genotypes) { | ||
gb = new GenotypeBuilder(g); | ||
final double[] tlodArray = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(g, GATKVCFConstants.TUMOR_LOD_KEY, () -> null, 0.0); | ||
final double[] variantAFArray = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(g, GATKVCFConstants.ALLELE_FRACTION_KEY, () -> null, 0.0); | ||
double variantAFtotal = 0; | ||
final List<Allele> calledAlleles = new ArrayList<>(); | ||
for(int i = 0; i < vc.getAlleles().size()-1; i++) { | ||
variantAFtotal += variantAFArray[i]; | ||
if (tlodArray[i] > tlodThreshold) { | ||
calledAlleles.add(vc.getAlternateAllele(i)); | ||
perAlleleLikelihoodSums[i+1] += tlodArray[i]; | ||
} | ||
} | ||
//hack for weird Mutect2 ploidy -- if the variant is non-homoplasmic, call the reference allele too | ||
if(variantAFtotal < 1-afTolerance && (g.hasAD() ? g.getAD()[0] > 0 : true)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm actually going to vote against the ternary in favor of |
||
calledAlleles.add(0, vc.getReference()); | ||
} | ||
//"ploidy" gets set according to the size of the alleles List in the Genotype | ||
gb.alleles(calledAlleles); | ||
newGenotypes.add(gb.make()); | ||
} | ||
|
||
final VariantContext regenotypedVC = builder.genotypes(newGenotypes).make(); | ||
|
||
final int maxAltAlleles = ((UnifiedArgumentCollection)genotypingEngine.getConfiguration()).genotypeArgs.MAX_ALTERNATE_ALLELES; | ||
List<Allele> allelesToKeep; | ||
|
||
//we need to make sure all alleles pass the tlodThreshold | ||
allelesToKeep = new ArrayList<>(perAlleleLikelihoodSums.length-1); | ||
allelesToKeep.add(vc.getReference()); | ||
for (int i = 1; i < perAlleleLikelihoodSums.length; i++) { | ||
if (perAlleleLikelihoodSums[i] > tlodThreshold) { | ||
allelesToKeep.add(vc.getAlternateAllele(i-1)); | ||
} | ||
} | ||
|
||
if (regenotypedVC.getAlternateAlleles().size() > maxAltAlleles) { | ||
allelesToKeep = AlleleSubsettingUtils.filterToMaxNumberOfAltAllelesBasedOnScores(maxAltAlleles, allelesToKeep, perAlleleLikelihoodSums); | ||
} | ||
|
||
if (allelesToKeep.size() == 1) { | ||
return null; | ||
} | ||
|
||
//if we didn't drop alleles then we're done! | ||
if (allelesToKeep.size() == regenotypedVC.getAlleles().size()) { | ||
return regenotypedVC; | ||
} | ||
|
||
final int[] relevantIndices = new int[allelesToKeep.size()]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
for (int i = 0; i < allelesToKeep.size(); i++) { | ||
relevantIndices[i] = regenotypedVC.getAlleles().indexOf(allelesToKeep.get(i)); | ||
} | ||
|
||
//do another pass over genotypes to drop the alleles that aren't called | ||
final GenotypesContext reducedGenotypes = AlleleSubsettingUtils.subsetSomaticAlleles(outputHeader, regenotypedVC.getGenotypes(), allelesToKeep, relevantIndices); | ||
final VariantContext subsetVC = builder.alleles(allelesToKeep).genotypes(reducedGenotypes).make(); | ||
final VariantContext trimmedVC = GATKVariantContextUtils.trimAlleles(subsetVC, true, true); | ||
if (isProperlyPolymorphic(trimmedVC)) { | ||
return trimmedVC; | ||
} | ||
else { | ||
return null; | ||
} | ||
} | ||
|
||
|
||
/** | ||
* Determines whether the provided VariantContext has real alternate alleles. | ||
* | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
SOMATIC_INPUT
seems better thanUSE_SOMATIC
, IMO.