Skip to content

Commit

Permalink
v1.3.6
Browse files Browse the repository at this point in the history
  • Loading branch information
hartleys committed Sep 25, 2018
1 parent 39cd1fc commit 099881f
Show file tree
Hide file tree
Showing 20 changed files with 1,858 additions and 97 deletions.
Binary file modified QoRTs-vignette.pdf
Binary file not shown.
Binary file modified QoRTs.jar
Binary file not shown.
Binary file added QoRTs_1.3.6.tar.gz
Binary file not shown.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# QoRTs v1.3.0
(Compiled Fri Oct 20 11:56:37 EDT 2017)
# QoRTs v1.3.6
(Compiled Tue Sep 25 11:21:46 EDT 2018)

The [QoRTs software package](http://hartleys.github.io/QoRTs/) is a fast, efficient, and portable
multifunction toolkit designed to assist in
Expand Down
Binary file modified example-walkthrough.pdf
Binary file not shown.
2 changes: 2 additions & 0 deletions src/HartleyUtils/assembleHSU.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,5 @@ export _JAVA_OPTIONS="-Xmx1000M -Xms500M -XX:ParallelGCThreads=1"
export MALLOC_ARENA_MAX=1

sbt < sbtAssemblyCommand.txt

cp target/scala-2.12/QoRTs.jar ./
6 changes: 2 additions & 4 deletions src/HartleyUtils/build.sbt
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
import AssemblyKeys._

assemblySettings

scalaVersion := "2.12.4"

scalacOptions := Seq("-unchecked", "-deprecation")
scalacOptions := Seq("-unchecked", "-deprecation", "-feature")

jarName in assembly := "QoRTs.jar"
assemblyJarName in assembly := "QoRTs.jar"

mainClass in assembly := Some("runner.runner")

3 changes: 3 additions & 0 deletions src/HartleyUtils/project/assembly.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@

addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.6")

1 change: 1 addition & 0 deletions src/HartleyUtils/project/build.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
sbt.version=1.1.4
2 changes: 1 addition & 1 deletion src/HartleyUtils/project/plugins.sbt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
resolvers += Resolver.url("artifactory", url("http://scalasbt.artifactoryonline.com/scalasbt/sbt-plugin-releases"))(Resolver.ivyStylePatterns)

addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.11.2")
//addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.11.2")
3 changes: 3 additions & 0 deletions src/HartleyUtils/sbtAssemblyCommand.txt
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
assembly

exit

Large diffs are not rendered by default.

61 changes: 61 additions & 0 deletions src/HartleyUtils/src/main/scala/internalUtils/Reporter.scala
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ object Reporter {
}
}


/*
* Do not use the below functions:
*/
Expand Down Expand Up @@ -238,6 +239,31 @@ object Reporter {
}
}

var PROGRESS_NEEDS_NEWLINE = true;
def progressReport(i : Int, s : String, verb : String = "progress"){
report("] " + (s +: getProgressReportStrings(i).map{s => " "+s} ).mkString("\n")+"\n",verb=verb);
}
def progressDot(i : Int, dotsPerGroup : Int = 5, groupsPerLine : Int = 4, blankSpacer : String = "-", verb : String = "progress"){
if(PROGRESS_NEEDS_NEWLINE){
startProgressLine(i-1,dotsPerGroup = dotsPerGroup, groupsPerLine = groupsPerLine, verb=verb,spacer = blankSpacer);
PROGRESS_NEEDS_NEWLINE = false;
}
if(i != (groupsPerLine * dotsPerGroup) && i % dotsPerGroup == 0){
report(". ",verb);
} else {
report(".",verb);
}

}
def startProgressLine(blankSpaces : Int, spacer : String = "x", groupSpacer : String = " ", dotsPerGroup : Int = 5, groupsPerLine : Int = 4, verb : String = "progress"){
var out = "[";
var grpNum = blankSpaces / dotsPerGroup;
out = out + stdUtils.repString(stdUtils.repString(spacer,dotsPerGroup)+groupSpacer,grpNum);
out = out + stdUtils.repString(spacer,blankSpaces % dotsPerGroup);
report(out,verb);
}


/*
* Reporting options:
*/
Expand Down Expand Up @@ -285,6 +311,17 @@ object Reporter {
}
warningCount(warnType) += 1;
}
val noticeCount = scala.collection.mutable.Map[String,Int]().withDefault((x : String) => 0);
def notice(str : String, warnType : String = "default", limit : Int = -1){
val warnCt = noticeCount(warnType)
if(limit < 0 || warnCt <= limit){
PROGRESS_NEEDS_NEWLINE = true;
reportln(" #### NOTE ("+warnType+" "+(warnCt+1)+"):","warn");
reportln(" >> "+str.split("\n").mkString("\n >> "),"warn");
if(limit > 0 && warnCt == limit) reportln(" (("+limit+"+ notices of type "+warnType+". Further warnings of this type will be silent.))","warn");
}
noticeCount(warnType) += 1;
}

def error(str : String){
reportln("<====== FATAL ERROR! ======>","error");
Expand Down Expand Up @@ -320,7 +357,31 @@ object Reporter {
})
reportln("<------->","warn");
}
if(! noticeCount.keys.isEmpty){
reportln("<------->","note");
reportln(" Note: "+noticeCount.keySet.map(noticeCount(_)).sum+" Notices Thrown:","note");
noticeCount.keySet.foreach(x => {
reportln(" "+noticeCount(x)+"\t"+x,"note");
})
reportln("<------->","note");
}
loggers.map((logger) => logger.close);
loggers = List();
}

var PROGRESS_REPORT_FUNCTIONS : Vector[(Int) => String] = Vector();

def getProgressReportStrings(i : Int) : Vector[String] = {
PROGRESS_REPORT_FUNCTIONS.map{f => {
f(i)
}}
}
def addProgressReportFunction(f : (Int) => String){
PROGRESS_REPORT_FUNCTIONS = PROGRESS_REPORT_FUNCTIONS :+ f;
}
def clearProgressReportFunctions(){
PROGRESS_REPORT_FUNCTIONS = Vector()
}


}
60 changes: 48 additions & 12 deletions src/HartleyUtils/src/main/scala/internalUtils/commonSeqUtils.scala
Original file line number Diff line number Diff line change
Expand Up @@ -850,33 +850,35 @@ object commonSeqUtils {

//}

def samRecordPairIterator_resorted(iter : Iterator[SAMRecord], verbose : Boolean = true, testCutoff : Int = -1, ignoreSecondary : Boolean = true) : Iterator[(SAMRecord,SAMRecord)] = {

def samRecordPairIterator_resorted(iter : Iterator[SAMRecord], verbose : Boolean = true, testCutoff : Int = -1, ignoreSecondary : Boolean = true, prefilterImproperPairs : Boolean = false) : Iterator[(SAMRecord,SAMRecord)] = {
val keepImproperPairs : Boolean = ! prefilterImproperPairs;
if(ignoreSecondary){
presetProgressReporters.wrapIterator_readPairs(getSRPairIterResorted(iter.filter((read : SAMRecord) => {
(! read.getNotPrimaryAlignmentFlag()) && (! getMateUnmappedFlag(read)) && (! read.getReadUnmappedFlag())
(keepImproperPairs || (read.getReadPairedFlag() && read.getProperPairFlag())) && (! read.getNotPrimaryAlignmentFlag()) && (! getMateUnmappedFlag(read)) && (! read.getReadUnmappedFlag())
})), verbose, testCutoff);
} else {
error("FATAL ERROR: Using non-primary read mappings is not currently implemented!");
return null;
}
}

def samRecordPairIterator_unsorted(iter : Iterator[SAMRecord], verbose : Boolean = true, testCutoff : Int = -1, ignoreSecondary : Boolean = true) : Iterator[(SAMRecord,SAMRecord)] = {

def samRecordPairIterator_unsorted(iter : Iterator[SAMRecord], verbose : Boolean = true, testCutoff : Int = -1, ignoreSecondary : Boolean = true, prefilterImproperPairs : Boolean = false) : Iterator[(SAMRecord,SAMRecord)] = {
val keepImproperPairs : Boolean = ! prefilterImproperPairs;
if(ignoreSecondary){
presetProgressReporters.wrapIterator_readPairs(getSRPairIterUnsorted(iter.filter((read : SAMRecord) => {
(! read.getNotPrimaryAlignmentFlag()) && (! getMateUnmappedFlag(read)) && (! read.getReadUnmappedFlag())
(keepImproperPairs || (read.getReadPairedFlag() && read.getProperPairFlag())) && (! read.getNotPrimaryAlignmentFlag()) && (! getMateUnmappedFlag(read)) && (! read.getReadUnmappedFlag())
})), verbose, testCutoff);
} else {
error("FATAL ERROR: Using non-primary read mappings is not currently implemented!");
return null;
}
}
def samRecordPairIterator_withMulti(iter : Iterator[SAMRecord], verbose : Boolean = true, testCutoff : Int = -1, ignoreSecondary : Boolean = true) : Iterator[(SAMRecord,SAMRecord)] = {
def samRecordPairIterator_withMulti(iter : Iterator[SAMRecord], verbose : Boolean = true, testCutoff : Int = -1, ignoreSecondary : Boolean = true, prefilterImproperPairs : Boolean = false) : Iterator[(SAMRecord,SAMRecord)] = {
val keepImproperPairs : Boolean = ! prefilterImproperPairs;

if(ignoreSecondary){
presetProgressReporters.wrapIterator_readPairs(getSRPairIter(iter.filter((read : SAMRecord) => {
(! read.getNotPrimaryAlignmentFlag()) && (! getMateUnmappedFlag(read)) && (! read.getReadUnmappedFlag())
(keepImproperPairs || (read.getReadPairedFlag() && read.getProperPairFlag())) && (! read.getNotPrimaryAlignmentFlag()) && (! getMateUnmappedFlag(read)) && (! read.getReadUnmappedFlag())
})), verbose, testCutoff);
} else {
error("FATAL ERROR: Using non-primary read mappings is not currently implemented!");
Expand Down Expand Up @@ -986,6 +988,12 @@ object commonSeqUtils {

while((! pairContainer.contains(curr.getReadName())) && iter.hasNext) {
pairContainer(curr.getReadName()) = curr;
if(curr.getReadPairedFlag() && (! curr.getProperPairFlag())){
warning("NOTE: qorts has detected a read flagged \"improperly paired\"! Some aligners allow \n"+
"paired reads to match to different chromosomes, \n"+
"which can cause errors in QoRTs as it attempts to perform a pairwise coordinate sort. \n"+
"You might need to rerun QoRTs with the --prefilterImproperPairs parameter!","IMPROPER_PAIR",1);
}
if(pairContainerWarningSize < pairContainer.size){
reportln("NOTE: Unmatched Read Buffer Size > "+pairContainerWarningSize+" [Mem usage:"+MemoryUtil.memInfo+"]","note");
if(pairContainerWarningSize == initialPairContainerWarningSize){
Expand Down Expand Up @@ -1016,7 +1024,7 @@ object commonSeqUtils {
}


private def getSRPairIterResorted(iter : Iterator[SAMRecord]) : Iterator[(SAMRecord,SAMRecord)] = {
private def getSRPairIterResorted(iter : Iterator[SAMRecord], strict : Boolean = true) : Iterator[(SAMRecord,SAMRecord)] = {
reportln("Starting getSRPairIterResorted...","debug");

val initialPairContainerWarningSize = 100000;
Expand All @@ -1037,6 +1045,7 @@ object commonSeqUtils {
def bufferHasNext : Boolean = iter.hasNext;
def addNextPairToBuffer {
var curr = iter.next;
var searchCt = 0;
while((! pairContainer.contains(curr.getReadName())) && iter.hasNext) {
readOrder = readOrder :+ curr.getReadName();
pairContainer(curr.getReadName()) = curr;
Expand All @@ -1054,11 +1063,26 @@ object commonSeqUtils {
}
pairContainerWarningSize = pairContainerWarningSize * warningSizeMultiplier;
}
searchCt = searchCt + 1;
curr = iter.next;
}

if(! pairContainer.contains(curr.getReadName()) ){
internalUtils.Reporter.error("ERROR ERROR ERROR: Reached end of bam file, there are "+(pairContainer.size+1)+" orphaned reads, which are marked as having a mapped pair, but no corresponding pair is found in the bam file. \n(Example Orphaned Read Name: "+curr.getReadName()+")");
if((! pairContainer.contains(curr.getReadName())) ){
if(strict){
internalUtils.Reporter.error("ERROR ERROR ERROR (636): Reached end of bam file, there are "+(pairContainer.size+1)+" orphaned reads, "+
"which are marked as having a mapped pair, but no "+
"corresponding pair is found in the bam file. \n"+
"(Example Orphaned Read Name: "+curr.getReadName()+")\n"+
"(Read line: "+curr.getSAMString()+")"
);
} else {
internalUtils.Reporter.warning("ERROR ERROR ERROR (636): Reached end of bam file, there are "+(pairContainer.size+1)+" orphaned reads, "+
"which are marked as having a mapped pair, but no "+
"corresponding pair is found in the bam file. \n"+
"(Example Orphaned Read Name: "+curr.getReadName()+")\n"+
"(Read line: "+curr.getSAMString()+")","UNPAIRED_READ",-1
);
}
}
val rB = pairContainer.remove(curr.getReadName()).get;

Expand All @@ -1074,11 +1098,22 @@ object commonSeqUtils {
val nextName = readOrder.head;
readOrder = readOrder.tail;
if(buffer.contains(nextName)) return buffer.remove(nextName).get;
var searchCt = 0;

while(iter.hasNext && (! buffer.contains(nextName))){
addNextPairToBuffer;
if(bufferWarningSize < buffer.size){
reportln("NOTE: Unmatched Read-PAIR-Buffer Size > "+bufferWarningSize+" [Mem usage:"+MemoryUtil.memInfo+"]","note");
val nextSamRead = pairContainer(nextName);
reportln("NOTE: Unsorted Read-PAIR-Buffer Size > "+bufferWarningSize+" [Mem usage:"+MemoryUtil.memInfo+"]\n"+
" Currently searching for read: " + nextName + " for "+searchCt + " iterations."+
" Searching for read: "+nextName+" "+nextSamRead.getReferenceName()+":"+nextSamRead.getAlignmentStart()+"-"+nextSamRead.getAlignmentEnd+" "+nextSamRead.getFlags()+"\n"+
" Current unmatched-pair-buffer status: "+pairContainer.size,"note");
if( nextSamRead.getReadPairedFlag() && (! nextSamRead.getProperPairFlag()) ){
reportln("NOTE: current-search read is flagged \"improperly paired\"! Some aligners allow \n"+
"paired reads to match to different chromosomes, \n"+
"which can cause errors in QoRTs as it attempts to perform a pairwise coordinate sort. \n"+
"You might need to rerun QoRTs with the --prefilterImproperPairs parameter!","warn");
}
if(bufferWarningSize == initialPairContainerWarningSize){
reportln(" (This is generally not a problem, but if this increases further then OutOfMemoryExceptions\n"+
" may occur.\n"+
Expand All @@ -1091,6 +1126,7 @@ object commonSeqUtils {
}
bufferWarningSize = bufferWarningSize * warningSizeMultiplier;
}
searchCt = searchCt + 1;
}
if(!buffer.contains(nextName)){
internalUtils.Reporter.error("ERROR ERROR ERROR: Reached end of bam file, there are "+(pairContainer.size+1)+" orphaned reads, which are marked as having a mapped pair, but no corresponding pair is found in the bam file. \n(Example Orphaned Read Name: "+nextName+")");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,14 @@ object genomicAnnoUtils {
error("ERROR: EfficientGenomeSeqContainer: requested sequence for chromosome other than current chromosome!");
}

if(start < 0){
if(end < 0){
return repString("N",end-start);
} else {
return repString("N",-start) + getSeqForInterval(chrom=chrom,start=0,end=end,truncate=truncate);
}
}

if(start < bufferStart){
reportln("ERROR: EfficientGenomeSeqContainer: Illegal request for sequence prior to current buffer limits!\n"+
" request: "+chrom+":"+start+"-"+end,"note");
Expand Down Expand Up @@ -158,35 +166,55 @@ object genomicAnnoUtils {
//Implementation note: It is important that you never read from remainderIter without first emptying currentIter!
// Otherwise scala attempts to store the currentIter values.
// This even remains true if there are no external references to the attached currentIter.
def parseChromLine(line : String) : String = {
if(line.head != '>'){
error("genome fasta file \""+infile+"\" appears mal-formatted! Chromsome line does not start with '>'!:\nOffending line: \""+line+"\"")
}
line.tail.split("\\s+",2)(0)
}

def switchToChrom(chrom : String){
report("Switching to Chromosome: " + chrom + " ["+getDateAndTimeString+"] ... ","debug");
while(currentIter.hasNext) currentIter.next;
var iter = remainderIter.dropWhile(line => line != (">"+chrom));
if(! iter.hasNext){
iter = internalUtils.fileUtils.getLinesSmartUnzip(infile).dropWhile(line => line != (">"+chrom));
if(iter.hasNext){
reportln("Returning to start of genome FASTA file. NOTE: for optimal performance, sort the FASTA file so that the chromosomes appear in the same order as in the BAM files.","note");
} else {
error("FATAL ERROR: Cannot find chromosome \""+chrom+"\" in genome FASTA file!")
}
}
val iterPair = iter.drop(1).span(line => line.charAt(0) != '>');
currentIter = iterPair._1.map(_.toUpperCase());
remainderIter = iterPair._2;
clearBuffer();
currChrom = chrom;
report("done ["+getDateAndTimeString+"]\n","debug");
switchToChrom(chrom,true);
report(" found chrom "+chrom+" ["+getDateAndTimeString+"]\n","debug");
}
def switchToChrom(chrom : String,circleBack : Boolean){
while(chrom != currChrom && reader.hasNext){
reportln(" Skipping chrom \""+currChrom+"\" in genome fasta...","debug");
skipWhile(reader)(line => line.charAt(0) != '>')
if(reader.hasNext){
currChrom = parseChromLine(reader.next);
if(chrom != currChrom) reportln(" Skipping chrom \""+currChrom+"\" in genome fasta...","debug");
}
}
if(circleBack && currChrom != chrom){
notice("Returning to start of genome FASTA file. NOTE: for optimal performance, sort the FASTA file so that the chromosomes appear in the same order as in the BAM files.","LOOPED_GENOME_FASTA",-1);
reader = internalUtils.fileUtils.getLinesSmartUnzip(infile).buffered
currChrom = parseChromLine(reader.next);
switchToChrom(chrom,false);
}
}

var initialReader = internalUtils.fileUtils.getLinesSmartUnzip(infile);
currChrom = initialReader.next().substring(1);
var reader = internalUtils.fileUtils.getLinesSmartUnzip(infile).buffered;
if(! reader.hasNext){
error("genome fasta file \""+infile+"\" appears to be empty!")
}
if(reader.head.head != '>'){
error("genome fasta file \""+infile+"\" appears mal-formatted! First line does not start with '>'!:\nOffending line: \""+reader+"\"")
}
currChrom = reader.next().substring(1);
chromList = List[String](currChrom);

var (currentIter,remainderIter) = initialReader.span(line => line.charAt(0) != '>');
currentIter = currentIter.map(_.toUpperCase());
val currentIter = new BufferedIterator[String]{
def hasNext = reader.hasNext && reader.head.charAt(0) != '>';
def next = reader.next.toUpperCase();
def head = reader.head;
}
//var (currentIter,remainderIter) = initialReader.span(line => line.charAt(0) != '>');
//currentIter = currentIter.map(_.toUpperCase());
def currIter = currentIter;
}


class EfficientGenomeSeqContainer_FAList(infiles : Seq[String]) extends EfficientGenomeSeqContainer {
val fastaMap : Map[String,String] = infiles.map(infile => {
(internalUtils.fileUtils.getLinesSmartUnzip(infile).next().tail,infile)
Expand Down
Loading

0 comments on commit 099881f

Please sign in to comment.