From efeeb6deebc0efd3217737ef82b2aa87d55a36ba Mon Sep 17 00:00:00 2001 From: jrobinso <933148+jrobinso@users.noreply.github.com> Date: Tue, 28 Nov 2023 20:11:18 -0800 Subject: [PATCH] Base modifications -- validate seq length vs MN tag. Also, in the absence of MN tag validate # of required bases from MM tag vs seq length. --- .../java/org/broad/igv/sam/SAMAlignment.java | 69 +++++++++++++++---- .../base_mods/megladon_newstyle_session.xml | 25 ------- 2 files changed, 56 insertions(+), 38 deletions(-) delete mode 100644 test/sessions/base_mods/megladon_newstyle_session.xml diff --git a/src/main/java/org/broad/igv/sam/SAMAlignment.java b/src/main/java/org/broad/igv/sam/SAMAlignment.java index 42d536c155..f7b04db02d 100644 --- a/src/main/java/org/broad/igv/sam/SAMAlignment.java +++ b/src/main/java/org/broad/igv/sam/SAMAlignment.java @@ -34,6 +34,7 @@ import htsjdk.samtools.SAMReadGroupRecord; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMTag; +import htsjdk.samtools.util.SequenceUtil; import org.broad.igv.logging.*; import org.broad.igv.Globals; import org.broad.igv.feature.Strand; @@ -121,20 +122,22 @@ public class SAMAlignment implements Alignment { private List baseModificationSets; private SMRTKinetics smrtKinetics; - private enum CacheKey {CLIPPING_COUNTS, SA_GROUP}; + private enum CacheKey {CLIPPING_COUNTS, SA_GROUP} + + ; /** * Use this to cache an expensive to compute value on this record and retrieve it as necessary. */ @SuppressWarnings("unchecked") - private T getCachedOrCompute(CacheKey key, Supplier supplier){ + private T getCachedOrCompute(CacheKey key, Supplier supplier) { final Object value = record.getTransientAttribute(key); if (value == null) { final T newValue = supplier.get(); record.setTransientAttribute(key, newValue); return newValue; } else { - return (T)value; + return (T) value; } } @@ -287,9 +290,9 @@ public Cigar getCigar() { } @Override - public ClippingCounts getClippingCounts(){ - return getCachedOrCompute(CacheKey.CLIPPING_COUNTS, - () -> ClippingCounts.fromCigar(record.getCigar())); + public ClippingCounts getClippingCounts() { + return getCachedOrCompute(CacheKey.CLIPPING_COUNTS, + () -> ClippingCounts.fromCigar(record.getCigar())); } public String getReadSequence() { @@ -367,12 +370,23 @@ public List getBaseModificationSets() { Object mm = record.hasAttribute("Mm") ? record.getAttribute("Mm") : record.getAttribute("MM"); byte[] ml = (byte[]) (record.hasAttribute("Ml") ? record.getAttribute("Ml") : record.getAttribute("ML")); + Integer mn = record.getIntegerAttribute("MN"); - // Minimal tag validation - if(mm instanceof String && (ml == null || ml instanceof byte [])) { + // Minimal tag validation -- 10X uses MM and/or ML for other purposes + if (mm instanceof String && (mm.toString().length() > 0) && (ml == null || ml instanceof byte[])) { byte[] sequence = record.getReadBases(); + // Sequence length validation -- if MN tag is present use it, otherwise do a partial validation + if (mn != null) { + if (mn != record.getReadBases().length) { + return null; + } + } else if (!validateMMTag(mm.toString(), sequence)) { //record.getCigarString().indexOf("H") > 0 && + return null; + } + + if (mm.toString().length() == 0) { // TODO -- more extensive validation? baseModificationSets = Collections.EMPTY_LIST; } else { @@ -383,6 +397,35 @@ public List getBaseModificationSets() { return baseModificationSets; } + /** + * Validate an MM tag against the sequence length. This will not catch all mismatches, but will many. + * + * @param mm - an MM tag, e.g. C+m,5,12,0 => at least 20 "Cs", 3 with modifications and 17 skipped + * @return + */ + boolean validateMMTag(String mm, byte[] sequence) { + String[] mmTokens = mm.split(";"); + for (String mmi : mmTokens) { + String[] tokens = mmi.split(","); //Globals.commaPattern.split(mm); + byte base = (byte) tokens[0].charAt(0); + char strand = tokens[0].charAt(1); + if (strand == '-') { + base = SequenceUtil.complement(base); + } + int baseCount = 0; + for (int i = 0; i < sequence.length; i++) if (sequence[i] == base) baseCount++; + // Count # of bases implied by tag + int modified = tokens.length - 1; + int skipped = 0; + for (int i = 1; i < tokens.length; i++) skipped += Integer.parseInt(tokens[i]); + if (modified + skipped > baseCount) { + System.out.println("m+skipped = " + (modified + skipped) + " base count = " + baseCount); + return false; + } + } + return true; + } + public SMRTKinetics getSmrtKinetics() { if (smrtKinetics == null) { smrtKinetics = new SMRTKinetics(this); @@ -794,7 +837,7 @@ public String getAlignmentValueString(double position, int mouseX, AlignmentTrac // Identify the number of hard and soft clipped bases. ClippingCounts clipping = getClippingCounts(); - if (!clipping.isClipped()){ + if (!clipping.isClipped()) { buf.append("None"); } else { if (clipping.isLeftClipped()) { @@ -978,8 +1021,8 @@ private String getSupplAlignmentString() { final List supplementaryAlignments = getSupplementaryAlignments(); final int insertionIndex = SupplementaryAlignment.getInsertionIndex(this, supplementaryAlignments); int i = 0; - for (SupplementaryAlignment sa: supplementaryAlignments) { - if(i == insertionIndex) { //Add this read into the list + for (SupplementaryAlignment sa : supplementaryAlignments) { + if (i == insertionIndex) { //Add this read into the list sb.append(getThisReadDescriptionForSAList()); } i++; @@ -989,7 +1032,7 @@ private String getSupplAlignmentString() { sb.append("
* Invalid SA entry (not listed) *"); } } - if(i == insertionIndex){ + if (i == insertionIndex) { sb.append(getThisReadDescriptionForSAList()); } return sb.toString(); @@ -1211,7 +1254,7 @@ public int getHapDistance() { return hapDistance; } - public List getSupplementaryAlignments(){ + public List getSupplementaryAlignments() { return getCachedOrCompute(CacheKey.SA_GROUP, () -> { Object rawSAValue = this.getAttribute(SAMTag.SA); diff --git a/test/sessions/base_mods/megladon_newstyle_session.xml b/test/sessions/base_mods/megladon_newstyle_session.xml deleted file mode 100644 index e8b0efa20f..0000000000 --- a/test/sessions/base_mods/megladon_newstyle_session.xml +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - -