Skip to content

Commit

Permalink
Base modifications -- validate seq length vs MN tag. Also, in the abs…
Browse files Browse the repository at this point in the history
…ence of MN tag validate # of required bases from MM tag vs seq length.
  • Loading branch information
jrobinso committed Nov 29, 2023
1 parent 518045e commit efeeb6d
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 38 deletions.
69 changes: 56 additions & 13 deletions src/main/java/org/broad/igv/sam/SAMAlignment.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import htsjdk.samtools.SAMReadGroupRecord;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMTag;
import htsjdk.samtools.util.SequenceUtil;
import org.broad.igv.logging.*;
import org.broad.igv.Globals;
import org.broad.igv.feature.Strand;
Expand Down Expand Up @@ -121,20 +122,22 @@ public class SAMAlignment implements Alignment {
private List<BaseModificationSet> baseModificationSets;
private SMRTKinetics smrtKinetics;

private enum CacheKey {CLIPPING_COUNTS, SA_GROUP};
private enum CacheKey {CLIPPING_COUNTS, SA_GROUP}

;

/**
* Use this to cache an expensive to compute value on this record and retrieve it as necessary.
*/
@SuppressWarnings("unchecked")
private <T> T getCachedOrCompute(CacheKey key, Supplier<T> supplier){
private <T> T getCachedOrCompute(CacheKey key, Supplier<T> supplier) {
final Object value = record.getTransientAttribute(key);
if (value == null) {
final T newValue = supplier.get();
record.setTransientAttribute(key, newValue);
return newValue;
} else {
return (T)value;
return (T) value;
}
}

Expand Down Expand Up @@ -287,9 +290,9 @@ public Cigar getCigar() {
}

@Override
public ClippingCounts getClippingCounts(){
return getCachedOrCompute(CacheKey.CLIPPING_COUNTS,
() -> ClippingCounts.fromCigar(record.getCigar()));
public ClippingCounts getClippingCounts() {
return getCachedOrCompute(CacheKey.CLIPPING_COUNTS,
() -> ClippingCounts.fromCigar(record.getCigar()));
}

public String getReadSequence() {
Expand Down Expand Up @@ -367,12 +370,23 @@ public List<BaseModificationSet> getBaseModificationSets() {

Object mm = record.hasAttribute("Mm") ? record.getAttribute("Mm") : record.getAttribute("MM");
byte[] ml = (byte[]) (record.hasAttribute("Ml") ? record.getAttribute("Ml") : record.getAttribute("ML"));
Integer mn = record.getIntegerAttribute("MN");

// Minimal tag validation
if(mm instanceof String && (ml == null || ml instanceof byte [])) {
// Minimal tag validation -- 10X uses MM and/or ML for other purposes
if (mm instanceof String && (mm.toString().length() > 0) && (ml == null || ml instanceof byte[])) {

byte[] sequence = record.getReadBases();

// Sequence length validation -- if MN tag is present use it, otherwise do a partial validation
if (mn != null) {
if (mn != record.getReadBases().length) {
return null;
}
} else if (!validateMMTag(mm.toString(), sequence)) { //record.getCigarString().indexOf("H") > 0 &&
return null;
}


if (mm.toString().length() == 0) { // TODO -- more extensive validation?
baseModificationSets = Collections.EMPTY_LIST;
} else {
Expand All @@ -383,6 +397,35 @@ public List<BaseModificationSet> getBaseModificationSets() {
return baseModificationSets;
}

/**
* Validate an MM tag against the sequence length. This will not catch all mismatches, but will many.
*
* @param mm - an MM tag, e.g. C+m,5,12,0 => at least 20 "Cs", 3 with modifications and 17 skipped
* @return
*/
boolean validateMMTag(String mm, byte[] sequence) {
String[] mmTokens = mm.split(";");
for (String mmi : mmTokens) {
String[] tokens = mmi.split(","); //Globals.commaPattern.split(mm);
byte base = (byte) tokens[0].charAt(0);
char strand = tokens[0].charAt(1);
if (strand == '-') {
base = SequenceUtil.complement(base);
}
int baseCount = 0;
for (int i = 0; i < sequence.length; i++) if (sequence[i] == base) baseCount++;
// Count # of bases implied by tag
int modified = tokens.length - 1;
int skipped = 0;
for (int i = 1; i < tokens.length; i++) skipped += Integer.parseInt(tokens[i]);
if (modified + skipped > baseCount) {
System.out.println("m+skipped = " + (modified + skipped) + " base count = " + baseCount);
return false;
}
}
return true;
}

public SMRTKinetics getSmrtKinetics() {
if (smrtKinetics == null) {
smrtKinetics = new SMRTKinetics(this);
Expand Down Expand Up @@ -794,7 +837,7 @@ public String getAlignmentValueString(double position, int mouseX, AlignmentTrac
// Identify the number of hard and soft clipped bases.
ClippingCounts clipping = getClippingCounts();

if (!clipping.isClipped()){
if (!clipping.isClipped()) {
buf.append("None");
} else {
if (clipping.isLeftClipped()) {
Expand Down Expand Up @@ -978,8 +1021,8 @@ private String getSupplAlignmentString() {
final List<SupplementaryAlignment> supplementaryAlignments = getSupplementaryAlignments();
final int insertionIndex = SupplementaryAlignment.getInsertionIndex(this, supplementaryAlignments);
int i = 0;
for (SupplementaryAlignment sa: supplementaryAlignments) {
if(i == insertionIndex) { //Add this read into the list
for (SupplementaryAlignment sa : supplementaryAlignments) {
if (i == insertionIndex) { //Add this read into the list
sb.append(getThisReadDescriptionForSAList());
}
i++;
Expand All @@ -989,7 +1032,7 @@ private String getSupplAlignmentString() {
sb.append("<br>* Invalid SA entry (not listed) *");
}
}
if(i == insertionIndex){
if (i == insertionIndex) {
sb.append(getThisReadDescriptionForSAList());
}
return sb.toString();
Expand Down Expand Up @@ -1211,7 +1254,7 @@ public int getHapDistance() {
return hapDistance;
}

public List<SupplementaryAlignment> getSupplementaryAlignments(){
public List<SupplementaryAlignment> getSupplementaryAlignments() {
return getCachedOrCompute(CacheKey.SA_GROUP,
() -> {
Object rawSAValue = this.getAttribute(SAMTag.SA);
Expand Down
25 changes: 0 additions & 25 deletions test/sessions/base_mods/megladon_newstyle_session.xml

This file was deleted.

0 comments on commit efeeb6d

Please sign in to comment.