Skip to content

Commit

Permalink
SVAnnotate: Functional annotations for SVs called by GATK-SV (#7431)
Browse files Browse the repository at this point in the history
  • Loading branch information
epiercehoffman authored Mar 9, 2022
1 parent c1190ac commit 1c749b3
Show file tree
Hide file tree
Showing 19 changed files with 2,200 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ public final class GATKSVVCFConstants {
public static final String CPX_EVENT_ALT_ARRANGEMENTS = "ALT_ARRANGEMENT";
public static final String CPX_SV_REF_SEGMENTS = "SEGMENTS";
public static final String CPX_EVENT_KEY = "CPX_EVENT";
public static final String CPX_INTERVALS = "CPX_INTERVALS";
public static final String CPX_TYPE = "CPX_TYPE";

// not defined in output vcf header but used in internal id that is currently output in the ID column
public static final String INTERVAL_VARIANT_ID_FIELD_SEPARATOR = "_";
Expand All @@ -86,6 +88,8 @@ public final class GATKSVVCFConstants {
public static final String DEPTH_ALGORITHM = "depth";
public static final String CONTIG2_ATTRIBUTE = "CHR2";
public static final String END2_ATTRIBUTE = "END2";
public static final String BND_DELETION_STRANDS = "+-";
public static final String BND_DUPLICATION_STRANDS = "-+";

// format block
public static final String COPY_NUMBER_FORMAT = "CN";
Expand All @@ -101,6 +105,23 @@ public final class GATKSVVCFConstants {
// Clustering
public static final String CLUSTER_MEMBER_IDS_KEY = "MEMBERS";

// functional annotations
public static final String LOF = "PREDICTED_LOF";
public static final String INT_EXON_DUP = "PREDICTED_INTRAGENIC_EXON_DUP";
public static final String COPY_GAIN = "PREDICTED_COPY_GAIN";
public static final String DUP_PARTIAL = "PREDICTED_DUP_PARTIAL";
public static final String PARTIAL_EXON_DUP = "PREDICTED_PARTIAL_EXON_DUP";
public static final String INTRONIC = "PREDICTED_INTRONIC";
public static final String INV_SPAN = "PREDICTED_INV_SPAN";
public static final String UTR = "PREDICTED_UTR";
public static final String MSV_EXON_OVERLAP = "PREDICTED_MSV_EXON_OVERLAP";
public static final String PROMOTER = "PREDICTED_PROMOTER";
public static final String BREAKEND_EXON = "PREDICTED_BREAKEND_EXONIC";
public static final String INTERGENIC = "PREDICTED_INTERGENIC";
public static final String NONCODING_SPAN = "PREDICTED_NONCODING_SPAN";
public static final String NONCODING_BREAKPOINT = "PREDICTED_NONCODING_BREAKPOINT";
public static final String NEAREST_TSS = "PREDICTED_NEAREST_TSS";
public static final String TSS_DUP = "PREDICTED_TSS_DUP";

///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
package org.broadinstitute.hellbender.tools.spark.sv.utils;

import htsjdk.samtools.*;
import htsjdk.samtools.util.Locatable;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFConstants;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.spark.utils.HopscotchSetSpark;
import org.broadinstitute.hellbender.tools.spark.utils.LongIterator;
import org.broadinstitute.hellbender.utils.SVInterval;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.io.IOUtils;
Expand Down Expand Up @@ -109,6 +111,34 @@ public static Set<String> getCanonicalChromosomes(final String nonCanonicalConti
}
}

/**
* Get numeric contig ID from string contig name based on SAMSequenceDictionary
* @throws IllegalArgumentException if contig name is not in sequence dictionary
* @param contigName - string version of contig name
* @param sequenceDictionary - SAMSequenceDictionary to map from name to ID
* @return - Integer contig ID
*/
public static Integer getContigIDFromName(final String contigName, final SAMSequenceDictionary sequenceDictionary) {
final Integer contigID = sequenceDictionary.getSequenceIndex(contigName);
if (contigID == -1) {
throw new IllegalArgumentException("Contig " + contigName + " not in provided contig ID to name map");
}
return contigID;
}


/**
* Converts locatable (uses 1-based, closed intervals) to SVInterval (1-based half-open). Conversion: end + 1
* @param loc - Locatable object (uses 1-based, closed intervals) to convert
* @param sequenceDictionary - SAMSequenceDictionary to map to numeric contig ID
* @return - SVInterval representing the same interval as the locatable, converted to 1-based half-open
*/
public static SVInterval locatableToSVInterval(final Locatable loc,
final SAMSequenceDictionary sequenceDictionary) {
final Integer contigID = getContigIDFromName(loc.getContig(), sequenceDictionary);
return new SVInterval(contigID, loc.getStart(), loc.getEnd() + 1);
}

// =================================================================================================================

/** return a good initialCapacity for a HashMap that will hold a given number of elements */
Expand Down
Loading

0 comments on commit 1c749b3

Please sign in to comment.