Skip to content

Commit

Permalink
Changed how symbollic alleles are handled for GencodeFuncotation crea…
Browse files Browse the repository at this point in the history
…tion. (#5834)

* Added `#` as a character to be sanitized by `VCFOutputRenderer`. (#5817)
* Added in code to change how the best transcript is determined.
* Changed how symbolic alleles are handled for `GencodeFuncotation` creation.

Fixes #5671
  • Loading branch information
jonn-smith authored Mar 26, 2019
1 parent c3a1139 commit 6beef4f
Show file tree
Hide file tree
Showing 14 changed files with 383 additions and 52 deletions.
2 changes: 1 addition & 1 deletion scripts/funcotator/testing/testFuncotator.sh
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ if [[ $r -eq 0 ]] && ${doRunLargeTests} ; then
REF=$HG19
else
INPUT=/Users/jonn/Development/FUNCOTATOR_LARGE_TEST_INPUTS/hg38_trio.vcf
#INPUT=/Users/jonn/Development/gatk/src/test/resources/large/funcotator/regressionTestVariantSetHG38.vcf
#INPUT=/Users/jonn/Development/gatk/src/test/resources/large/funcotator/regressionTestVariantSetHG38.vcf
#INPUT=/Users/jonn/Development/tmp/cohort24_23_seg.subset.vcf
#INPUT=/Users/jonn/Development/gatk/tmp.38.vcf
REF=$HG38
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -248,29 +248,46 @@ private boolean isFeatureListCompatible(final List<Feature> featureList) {
* @return Features from our FeatureInput {@link #mainSourceFileAsFeatureInput} queried from the FeatureContext
*/
@SuppressWarnings("unchecked")
protected List<Feature> queryFeaturesFromFeatureContext(final FeatureContext featureContext) {
private List<Feature> queryFeaturesFromFeatureContext(final FeatureContext featureContext) {
final List<Feature> features;

SimpleInterval queryInterval = featureContext.getInterval();

// Do we need to do a fuzzy hg19 / b37 conversion for querying our features:
if ( dataSourceIsB37 ) {
// Create a B37 interval:
final SimpleInterval b37Interval =
new SimpleInterval(
FuncotatorUtils.convertHG19ContigToB37Contig(featureContext.getInterval().getContig()),
featureContext.getInterval().getStart(),
featureContext.getInterval().getEnd()
);
// Get the features:
features = (List<Feature>) featureContext.getValues(mainSourceFileAsFeatureInput, b37Interval);
queryInterval = new SimpleInterval(
FuncotatorUtils.convertHG19ContigToB37Contig(queryInterval.getContig()),
queryInterval.getStart(),
queryInterval.getEnd()
);
}
// Query as normal:
else {

// Perform extra transformations on the query interval:
queryInterval = transformFeatureQueryInterval(queryInterval);

// If the interval has not changed, we should use the original one:
if ( queryInterval.equals(featureContext.getInterval() ) ) { // Get the features:
features = (List<Feature>) featureContext.getValues(mainSourceFileAsFeatureInput);
}
else {
// Query as normal:
features = (List<Feature>) featureContext.getValues(mainSourceFileAsFeatureInput, queryInterval);
}

return features;
}

/**
* A Method to allow {@link DataSourceFuncotationFactory} objects to adjust the query interval further for their
* own needs (e.g. for flanking).
* @param queryInterval The baseline {@link SimpleInterval} to be modified.
* @return A {@link SimpleInterval} that has been modified for this {@link DataSourceFuncotationFactory}'s specific needs.
*/
protected SimpleInterval transformFeatureQueryInterval(final SimpleInterval queryInterval) {
return queryInterval;
}

/**
* Creates a {@link List} of {@link Funcotation} for the given {@code variant} and {@code referenceContext}.
* These will be default funcotations that essentially have empty values.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import htsjdk.variant.variantcontext.VariantContext;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.hellbender.engine.FeatureContext;
import org.broadinstitute.hellbender.engine.FeatureInput;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.engine.ReferenceDataSource;
Expand Down Expand Up @@ -421,7 +420,7 @@ private List<GencodeFuncotation> createGencodeFuncotationsByAllTranscripts( fina
}

// Grab the best choice in the case of transcript selection modes other than ALL. The selection will be the first
// transcript in the list.
// transcript in the list.
if ((this.transcriptSelectionMode != TranscriptSelectionMode.ALL) && (gencodeFuncotationList.size() > 0)) {
return Collections.singletonList(gencodeFuncotationList.get(0));
}
Expand Down Expand Up @@ -502,21 +501,20 @@ public FuncotatorArgumentDefinitions.DataSourceType getType() {
}

/**
* {@inheritDoc}
*
* We override this method to request extra padding around queries on our FeatureContext to take into account
* the 5' and 3' flank sizes in our {@link #flankSettings}. We use the max of these two values as our
* query padding.
*
* @param featureContext the FeatureContext to query
* @return Features from our FeatureInput {@link #mainSourceFileAsFeatureInput} queried from the FeatureContext
*/
@Override
protected List<Feature> queryFeaturesFromFeatureContext( FeatureContext featureContext ) {
protected SimpleInterval transformFeatureQueryInterval(final SimpleInterval queryInterval) {
final int queryPadding = Math.max(flankSettings.fivePrimeFlankSize, flankSettings.threePrimeFlankSize);

@SuppressWarnings("unchecked")
final List<Feature> queryResults = (List<Feature>)featureContext.getValues(mainSourceFileAsFeatureInput, queryPadding, queryPadding);
final int newStart = queryInterval.getStart() - queryPadding;
final int newEnd = queryInterval.getEnd() + queryPadding;

return queryResults;
return new SimpleInterval( queryInterval.getContig(), newStart < 1 ? 1 : newStart, newEnd);
}

//==================================================================================================================
Expand Down Expand Up @@ -857,16 +855,6 @@ private GencodeFuncotation createGencodeFuncotationOnSingleTranscript(final Vari
final GencodeGtfTranscriptFeature transcript) {
final GencodeFuncotation gencodeFuncotation;

// If the alt allele is a spanning deletion or a symbolic allele, create an unknown funcotation:
if (altAllele.isSymbolic() || altAllele.equals(Allele.SPAN_DEL) ) {
return createFuncotationForSymbolicAltAllele(variant, altAllele, transcript, reference);
}

// If the reference allele or alternate allele contains any masked bases:
if ( variant.getReference().getBaseString().contains(FuncotatorConstants.MASKED_ANY_BASE_STRING) || altAllele.getBaseString().contains(FuncotatorConstants.MASKED_ANY_BASE_STRING) ) {
return createFuncotationForMaskedBases(variant, altAllele, transcript, reference);
}

// TODO: check for complex INDEL and warn and skip (https://github.com/broadinstitute/gatk/issues/5411).

// Find the sub-feature of transcript that contains our variant:
Expand Down Expand Up @@ -898,6 +886,12 @@ else if ( isThreePrimeFlank(variant, transcript, flankSettings.threePrimeFlankSi
}
}

// Make sure that we have accounted for "special" allele cases:
final GencodeFuncotation specialAlleleFuncotation = checkForAndCreateSpecialAlleleFuncotation(variant, altAllele, reference, transcript);
if ( specialAlleleFuncotation != null ) {
return specialAlleleFuncotation;
}

// Make sure the sub-regions in the transcript actually contain the variant:
// TODO: this is slow, and repeats work that is done later in the process (we call getSortedCdsAndStartStopPositions when creating the sequence comparison)
final int startPosInTranscript = FuncotatorUtils.getStartPositionInTranscript(variant, getSortedCdsAndStartStopPositions(transcript), transcript.getGenomicStrand() );
Expand Down Expand Up @@ -931,6 +925,32 @@ else if ( GencodeGtfTranscriptFeature.class.isAssignableFrom(containingSubfeatur
return gencodeFuncotation;
}

/**
* Checks the given variant alt allele for whether it's a special case.
* If so, will create the appropriate funcotation for that case.
* @param variant {@link VariantContext} for the given {@code altAllele}.
* @param altAllele The alternate {@link Allele} to be checked for special cases.
* @param reference {@link ReferenceContext} supporting the given {@code variant}.
* @param transcript {@link GencodeGtfTranscriptFeature} containing the given {@code variant}.
* @return A {@link GencodeFuncotation} for the given special case alt allele, or {@code null} if the allele is not a special case.
*/
private GencodeFuncotation checkForAndCreateSpecialAlleleFuncotation(final VariantContext variant,
final Allele altAllele,
final ReferenceContext reference,
final GencodeGtfTranscriptFeature transcript) {
// If the alt allele is a spanning deletion or a symbolic allele, create an unknown funcotation:
if (altAllele.isSymbolic() || altAllele.equals(Allele.SPAN_DEL) ) {
return createFuncotationForSymbolicAltAllele(variant, altAllele, transcript, reference);
}

// If the reference allele or alternate allele contains any masked bases:
if ( variant.getReference().getBaseString().contains(FuncotatorConstants.MASKED_ANY_BASE_STRING) || altAllele.getBaseString().contains(FuncotatorConstants.MASKED_ANY_BASE_STRING) ) {
return createFuncotationForMaskedBases(variant, altAllele, transcript, reference);
}

return null;
}

/**
* Create a {@link GencodeFuncotation} for a {@code variant} that occurs in a given {@code exon}.
* @param variant The {@link VariantContext} for which to create a {@link GencodeFuncotation}.
Expand Down Expand Up @@ -2372,6 +2392,12 @@ private GencodeFuncotation createIgrFuncotation(final VariantContext variant,
* @return A flank funcotation for the given allele
*/
private GencodeFuncotation createFlankFuncotation(final VariantContext variant, final Allele altAllele, final GencodeGtfTranscriptFeature transcript, final GencodeGtfGeneFeature gtfFeature, final ReferenceContext reference, final GencodeFuncotation.VariantClassification flankType) {

// Create a symbolic allele funcotation for the flank type, if appropriate:
if (altAllele.isSymbolic() || altAllele.equals(Allele.SPAN_DEL) ) {
return createFuncotationForSymbolicAltAllele(variant, altAllele, transcript, reference, flankType);
}

final GencodeFuncotationBuilder funcotationBuilder = new GencodeFuncotationBuilder();

// NOTE: The reference context is ALWAYS from the + strand
Expand Down Expand Up @@ -2414,12 +2440,31 @@ private GencodeFuncotation createFlankFuncotation(final VariantContext variant,
* @param altSymbolicAllele The alternate symbolic {@link Allele} associated with this annotation.
* @param annotationTranscript The transcript in which this variant occurs.
* @param reference The {@link ReferenceContext} in which the given {@link Allele}s appear.
* @return An IGR funcotation for the given allele.
* @return An appropriate funcotation for the given symbolic allele.
*/
private GencodeFuncotation createFuncotationForSymbolicAltAllele(final VariantContext variant,
final Allele altSymbolicAllele,
final GencodeGtfTranscriptFeature annotationTranscript,
final ReferenceContext reference) {
return createFuncotationForSymbolicAltAllele( variant, altSymbolicAllele, annotationTranscript, reference, GencodeFuncotation.VariantClassification.COULD_NOT_DETERMINE );
}

/**
* Creates a {@link GencodeFuncotation} based on a given symbolic alternate {@link Allele}.
*
* Reports reference bases as if they are on the {@link Strand#POSITIVE} strand.
* @param variant The {@link VariantContext} associated with this annotation.
* @param altSymbolicAllele The alternate symbolic {@link Allele} associated with this annotation.
* @param annotationTranscript The transcript in which this variant occurs.
* @param reference The {@link ReferenceContext} in which the given {@link Allele}s appear.
* @param variantClassification A {@link org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation.VariantClassification} to give to the resulting {@link GencodeFuncotation}.
* @return An appropriate funcotation for the given symbolic allele.
*/
private GencodeFuncotation createFuncotationForSymbolicAltAllele(final VariantContext variant,
final Allele altSymbolicAllele,
final GencodeGtfTranscriptFeature annotationTranscript,
final ReferenceContext reference,
final GencodeFuncotation.VariantClassification variantClassification) {

logger.warn("Cannot create complete funcotation for variant at " + variant.getContig() + ":" + variant.getStart() + "-" + variant.getEnd() + " due to alternate allele: " + altSymbolicAllele.toString());

Expand All @@ -2430,7 +2475,7 @@ private GencodeFuncotation createFuncotationForSymbolicAltAllele(final VariantCo

final String alleleString = altSymbolicAllele.getBaseString().isEmpty() ? altSymbolicAllele.toString() : altSymbolicAllele.getBaseString();

funcotationBuilder.setVariantClassification( GencodeFuncotation.VariantClassification.COULD_NOT_DETERMINE )
funcotationBuilder.setVariantClassification( variantClassification )
.setRefAllele( variant.getReference() )
.setStrand(Strand.POSITIVE)
.setTumorSeqAllele2( alleleString )
Expand Down
Loading

0 comments on commit 6beef4f

Please sign in to comment.