Skip to content

Commit

Permalink
Support "bedmethyl" format. See issue #1396
Browse files Browse the repository at this point in the history
  • Loading branch information
jrobinso authored Sep 25, 2023
1 parent 9fa2491 commit b948ec7
Show file tree
Hide file tree
Showing 6 changed files with 85 additions and 59 deletions.
2 changes: 1 addition & 1 deletion src/main/java/org/broad/igv/feature/FeatureType.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
* @author jrobinso
*/
public enum FeatureType {
OTHER, GENE, PROMOTER, MISC_RNA, REPEAT_REGION, LTR, MUTATION, BED, GAPPED_PEAK, SPLICE_JUNCTION
OTHER, GENE, PROMOTER, MISC_RNA, REPEAT_REGION, LTR, MUTATION, BED, GAPPED_PEAK, SPLICE_JUNCTION, BED_METHYL

}

20 changes: 12 additions & 8 deletions src/main/java/org/broad/igv/feature/FeatureUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ public static Feature getFeatureCenteredAfter(double position, List<? extends Fe
}

int idx = getIndexCenterAfter(position, features);
if (idx < 0 || idx > features.size()-1) {
if (idx < 0 || idx > features.size() - 1) {
return null;
} else {
return features.get(idx);
Expand Down Expand Up @@ -214,10 +214,10 @@ public static Feature getFeatureClosest(double position, List<? extends Feature>
private static int getDistance(final int position, final Feature f) {
final int end = f.getEnd();
final int start = f.getStart();
if(position >= start && position < end){ //within the feature
if (position >= start && position < end) { //within the feature
return 0;
} else if(position >= end){
return position - end +1; //after the feature
} else if (position >= end) {
return position - end + 1; //after the feature
} else {
return start - position; //before the feature
}
Expand Down Expand Up @@ -265,7 +265,7 @@ public static int getIndexBefore(double position, List<? extends Feature> featur
* @param features
* @return
*/
private static int getIndexCenterAfter(double position, List<? extends Feature> features) {
private static int getIndexCenterAfter(double position, List<? extends Feature> features) {

Feature first = features.get(0);
Feature last = features.get(features.size() - 1);
Expand Down Expand Up @@ -387,19 +387,23 @@ public static List<Feature> getAllFeaturesAt(double position,
for (int idx = startIdx; idx < features.size(); idx++) {
Feature feature = features.get(idx);
double start = feature.getStart() - (minWidth / 2);

if (start > position) {
break;
}

double end = feature.getEnd() + (minWidth / 2);

if (position >= start && position <= end) {
if (returnList == null) returnList = new ArrayList();
returnList.add(feature);
}
}

// Sort features by distance from position (features closest to position listed first)
returnList.sort((o1, o2) -> {
double dist1 = Math.abs((o1.getStart() + (o1.getEnd() - o1.getStart()) / 2) - position);
double dist2 = Math.abs((o2.getStart() + (o2.getEnd() - o2.getStart()) / 2) - position);
return dist1 == dist2 ? 0 : dist1 > dist2 ? 1 : -1;
});

return returnList;
}

Expand Down
3 changes: 3 additions & 0 deletions src/main/java/org/broad/igv/feature/tribble/CodecFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,9 @@ public static FeatureCodec getCodec(ResourceLocator locator, Genome genome) {
return new UCSCGeneTableCodec(UCSCGeneTableCodec.Type.UCSCGENE, genome);
case "genepredext":
return new UCSCGeneTableCodec(UCSCGeneTableCodec.Type.GENEPRED_EXT, genome);
case "bedmethyl":
return new IGVBEDCodec(genome, FeatureType.BED_METHYL);

default:
if (MUTCodec.isMutationAnnotationFile(locator)) {
return new MUTCodec(path, genome);
Expand Down
107 changes: 65 additions & 42 deletions src/main/java/org/broad/igv/feature/tribble/IGVBEDCodec.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.broad.igv.feature.*;
import org.broad.igv.feature.genome.Genome;
import org.broad.igv.ui.color.ColorUtilities;
import org.broad.igv.ui.util.MessageUtils;
import org.broad.igv.util.StringUtils;
import htsjdk.tribble.Feature;

Expand All @@ -45,7 +46,7 @@
* Date: Dec 20, 2009
* Time: 10:15:49 PM
*/
public class IGVBEDCodec extends UCSCCodec<BasicFeature> {
public class IGVBEDCodec extends UCSCCodec<BasicFeature> {

private static final Logger log = LogManager.getLogger(IGVBEDCodec.class);

Expand All @@ -56,6 +57,8 @@ public class IGVBEDCodec extends UCSCCodec<BasicFeature> {
private Genome genome;
private Pattern delimiter = null;

private int maxColumnCount = Integer.MAX_VALUE;

public IGVBEDCodec() {
this(null);
}
Expand All @@ -75,7 +78,7 @@ public BasicFeature decode(String[] tokens) {

// The first 3 columns are non optional for BED. We will relax this
// and only require 2.
int tokenCount = tokens.length;
int tokenCount = Math.min(tokens.length, maxColumnCount);

if (tokenCount < 2) {
return null;
Expand Down Expand Up @@ -180,7 +183,6 @@ public BasicFeature decode(String[] tokens) {
}
}


// Color
if (tokenCount > 8 && featureType != FeatureType.GAPPED_PEAK) {
String colorString = tokens[8];
Expand All @@ -189,47 +191,70 @@ public BasicFeature decode(String[] tokens) {
}
}

// Exons
if (tokenCount > 11) {
createExons(start, tokens, feature, chr, feature.getStrand());
//todo: some refactoring that allows this hack to be removed
if (featureType == FeatureType.SPLICE_JUNCTION) {
SpliceJunctionFeature junctionFeature = (SpliceJunctionFeature) feature;
if (featureType == FeatureType.BED_METHYL && tokenCount > 9) {
// Description from https://www.encodeproject.org/data-standards/wgbs/
// 10. Coverage, or number of reads
// 11. Percentage of reads that show methylation at this position in the genome
Map<String, String> attributes = new LinkedHashMap<>();
if (tokenCount > 9) {
attributes.put("Coverage", tokens[9]);
}
if (tokenCount > 10) {
attributes.put("% of methylated reads", tokens[10]);
}
feature.setAttributes(attributes);
} else {
// Exons
try {
if (tokenCount > 11) {
createExons(start, tokens, feature, chr, feature.getStrand());
//todo: some refactoring that allows this hack to be removed
if (featureType == FeatureType.SPLICE_JUNCTION) {
SpliceJunctionFeature junctionFeature = (SpliceJunctionFeature) feature;

List<Exon> exons = feature.getExons();
List<Exon> exons = feature.getExons();

junctionFeature.setJunctionStart(start + exons.get(0).getLength());
junctionFeature.setJunctionEnd(end - exons.get(1).getLength());
junctionFeature.setJunctionStart(start + exons.get(0).getLength());
junctionFeature.setJunctionEnd(end - exons.get(1).getLength());

}
}
} catch (NumberFormatException e) {
final String message = "Unexpected data in columns 10-12, expected exon count, exon starts, and exon sizes. Found: " +
tokens[9] + " " + tokens[10] + " " + tokens[11] + "<br>Columns > 9 will be ignored";
this.maxColumnCount = 9;
MessageUtils.showMessage(message);
log.warn(message, e);
return feature;
}
}

if (tokenCount > 14 && featureType == FeatureType.GAPPED_PEAK) {
Map<String, String> attributes = new LinkedHashMap<>();
attributes.put("Signal Value", tokens[12]);
attributes.put("pValue (-log10)", tokens[13]);
attributes.put("qValue (-log10)", tokens[14]);
feature.setAttributes(attributes);
} else if (tokenCount > 13 && featureType == FeatureType.SPLICE_JUNCTION) {
try {
String[] startFlanking = tokens[12].split(",");
int[] startFlankingDeptyArray = new int[startFlanking.length];
for (int i = 0; i < startFlanking.length; i++) {
startFlankingDeptyArray[i] = Integer.parseInt(startFlanking[i]);
}
String[] endFlanking = tokens[13].split(",");
int[] endFlankingDeptyArray = new int[endFlanking.length];
for (int i = 0; i < endFlanking.length; i++) {
endFlankingDeptyArray[i] = Integer.parseInt(endFlanking[i]);
if (tokenCount > 14 && featureType == FeatureType.GAPPED_PEAK) {
Map<String, String> attributes = new LinkedHashMap<>();
attributes.put("Signal Value", tokens[12]);
attributes.put("pValue (-log10)", tokens[13]);
attributes.put("qValue (-log10)", tokens[14]);
feature.setAttributes(attributes);
} else if (tokenCount > 13 && featureType == FeatureType.SPLICE_JUNCTION) {
try {
String[] startFlanking = tokens[12].split(",");
int[] startFlankingDeptyArray = new int[startFlanking.length];
for (int i = 0; i < startFlanking.length; i++) {
startFlankingDeptyArray[i] = Integer.parseInt(startFlanking[i]);
}
String[] endFlanking = tokens[13].split(",");
int[] endFlankingDeptyArray = new int[endFlanking.length];
for (int i = 0; i < endFlanking.length; i++) {
endFlankingDeptyArray[i] = Integer.parseInt(endFlanking[i]);
}
((SpliceJunctionFeature) feature).setStartFlankingRegionDepthArray(startFlankingDeptyArray);
((SpliceJunctionFeature) feature).setEndFlankingRegionDepthArray(endFlankingDeptyArray);
} catch (NumberFormatException e) {
log.error("Error parsing flanking array", e);
}
((SpliceJunctionFeature) feature).setStartFlankingRegionDepthArray(startFlankingDeptyArray);
((SpliceJunctionFeature) feature).setEndFlankingRegionDepthArray(endFlankingDeptyArray);
} catch (NumberFormatException e) {
log.error("Error parsing flanking array", e);
}
}

if(isCoding(feature)) {
if (isCoding(feature)) {
FeatureUtils.computeReadingFrames(feature);
}

Expand All @@ -239,9 +264,9 @@ public BasicFeature decode(String[] tokens) {
/**
* Approximate test for a coding feature (transcript). BED format does not specify this explicitly.
* Rules applied are
*
* <p>
* (1) feature has exons
* (2) feature has possible UTRs at both ends
* (2) feature has possible UTRs at both ends
* (3) feature has strand
*
* @param feature
Expand All @@ -267,8 +292,8 @@ public BasicFeature decode(String nextLine) {
}

// Bed files can be tab or whitespace delimited
if(delimiter == null) {
delimiter = trimLine.contains("\t") ? Globals.multiTabPattern : Globals.whitespacePattern;
if (delimiter == null) {
delimiter = trimLine.contains("\t") ? Globals.multiTabPattern : Globals.whitespacePattern;
}

tokens = delimiter.split(trimLine);
Expand Down Expand Up @@ -298,7 +323,7 @@ public boolean canDecode(String path) {


public static void createExons(int start, String[] tokens, BasicFeature gene, String chr,
Strand strand) throws NumberFormatException {
Strand strand) throws NumberFormatException {

int cdStart = Integer.parseInt(tokens[6]);
int cdEnd = Integer.parseInt(tokens[7]);
Expand Down Expand Up @@ -329,8 +354,6 @@ public static void createExons(int start, String[] tokens, BasicFeature gene, St
}




/**
* Encode a feature as a BED string.
*
Expand Down
10 changes: 3 additions & 7 deletions src/main/java/org/broad/igv/track/FeatureTrack.java
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,6 @@ public Renderer getRenderer() {
*/
public String getValueStringAt(String chr, double position, int mouseX, int mouseY, ReferenceFrame frame) {


if (showFeatures) {

List<Feature> allFeatures = getAllFeatureAt(position, mouseY, frame);
Expand All @@ -388,7 +387,7 @@ public String getValueStringAt(String chr, double position, int mouseX, int mous

StringBuffer buf = new StringBuffer();
boolean firstFeature = true;
int maxNumber = 100;
int maxNumber = IGV.getInstance().isShowDetailsOnClick() ? 100 : 10;
int n = 1;
for (Feature feature : allFeatures) {
if (feature != null && feature instanceof IGVFeature) {
Expand All @@ -406,17 +405,14 @@ public String getValueStringAt(String chr, double position, int mouseX, int mous
buf.append("<br/><a href=\"" + url + "\">" + url + "</a>");
}
}

firstFeature = false;

if (n > maxNumber) {
buf.append("...");
buf.append("<hr><br<b>>+ " + (allFeatures.size() - maxNumber) + " more ...</b>");
break;
}
}
n++;
}

return buf.toString();
} else {
int zoom = Math.max(0, frame.getZoom());
Expand Down Expand Up @@ -562,7 +558,7 @@ public List<Feature> getFeaturesAtPositionInFeatureRow(double position, int feat
if (possFeatures != null) {
// give a minum 2 pixel or 1/2 bp window, otherwise very narrow features will be missed.
double bpPerPixel = frame.getScale();
double minWidth = Math.max(0.5, MINIMUM_FEATURE_SPACING * bpPerPixel);
double minWidth = Math.max(2, 3 * bpPerPixel);
int maxFeatureLength = packedFeatures.getMaxFeatureLength();
featureList = FeatureUtils.getAllFeaturesAt(position, maxFeatureLength, minWidth, possFeatures);
}
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/broad/igv/util/ResourceLocator.java
Original file line number Diff line number Diff line change
Expand Up @@ -649,5 +649,5 @@ public String toString() {
}

static Set<String> knownFormats = new HashSet<>(Arrays.asList("gff", "bed", "gtf", "gff3",
"seg", "bb", "bigbed", "bigwig", "bam", "cram", "vcf"));
"seg", "bb", "bigbed", "bigwig", "bam", "cram", "vcf", "bedmethyl"));
}

0 comments on commit b948ec7

Please sign in to comment.