Merge pull request #9 from stjude/madetunj-update

ROSE file name update
stjude · Apr 18, 2021 · 24a76d5 · 24a76d5
2 parents 6301412 + 0722a77
commit 24a76d5
Show file tree

Hide file tree

Showing 4 changed files with 59 additions and 37 deletions.
diff --git a/bin/ROSE_bamToGFF.py b/bin/ROSE_bamToGFF.py
@@ -116,7 +116,13 @@ def mapBamToGFF(bamFile,gff,sense = 'both',extension = 200,floor = 0,rpm = False
 
 
         #setting up the output table
-        clusterLine = [gffLocus.ID(),gffLocus.__str__()]
+        #clusterLine = [gffLocus.ID(),gffLocus.__str__()]
+
+        # bug fix gff coordinates with same chromosomal name as BAM
+        if not hasChrFlag:
+            clusterLine = [gffLocus.ID(),"chr" + gffLocus.__str__()]
+        else: 
+           clusterLine = [gffLocus.ID(),gffLocus.__str__()]
 
         #getting the binsize
         binSize = (gffLocus.len()-1)/int(matrix)

diff --git a/bin/ROSE_callSuper.R b/bin/ROSE_callSuper.R
@@ -61,7 +61,7 @@ writeSuperEnhancer_table <- function(superEnhancer,description,outputFile,additi
 	cat(description,"\n",file=outputFile)
 	if(is.matrix(additionalData)){
 		if(nrow(additionalData)!=nrow(superEnhancer)){
-			warning("Additional data does not have the same number of rows as the number of super enhancers.\n--->>> ADDITIONAL DATA NOT INCLUDED <<<---\n")
+			warning("Additional data does not have the same number of rows as the number of super stitched peaks.\n--->>> ADDITIONAL DATA NOT INCLUDED <<<---\n")
 		}else{
 			superEnhancer <- cbind(superEnhancer,additionalData)
 			superEnhancer = superEnhancer[order(superEnhancer$enhancerRank),]
@@ -122,7 +122,7 @@ rankBy_vector[rankBy_vector < 0] <- 0
 
 #FIGURING OUT THE CUTOFF
 
-cutoff_options <- calculate_cutoff(rankBy_vector, drawPlot=FALSE,xlab=paste(rankBy_factor,'_enhancers'),ylab=paste(rankByFactor,' Signal','- ',wceName),lwd=2,col=4)
+cutoff_options <- calculate_cutoff(rankBy_vector, drawPlot=FALSE,xlab=paste(rankBy_factor,' Stitched peaks'),ylab=paste(rankByFactor,' Signal','- ',wceName),lwd=2,col=4)
 
 
 #These are the super-enhancers
@@ -136,24 +136,24 @@ plotFileName = paste(outFolder,enhancerName,'_Plot_points.png',sep='')
 png(filename=plotFileName,height=600,width=600)
 signalOrder = order(rankBy_vector,decreasing=TRUE)
 if(wceName == 'NONE'){
-	plot(length(rankBy_vector):1,rankBy_vector[signalOrder], col='red',xlab=paste(rankBy_factor,'_enhancers'),ylab=paste(rankBy_factor,' Signal'),pch=19,cex=2)	
+	plot(length(rankBy_vector):1,rankBy_vector[signalOrder], col='red',xlab=paste(rankBy_factor,' Stitched peaks'),ylab=paste(rankBy_factor,' Signal'),pch=19,cex=2)	
 
 }else{
-	plot(length(rankBy_vector):1,rankBy_vector[signalOrder], col='red',xlab=paste(rankBy_factor,'_enhancers'),ylab=paste(rankBy_factor,' Signal','- ',wceName),pch=19,cex=2)
+	plot(length(rankBy_vector):1,rankBy_vector[signalOrder], col='red',xlab=paste(rankBy_factor,' Stitched peaks'),ylab=paste(rankBy_factor,' Signal','- ',wceName),pch=19,cex=2)
 }
 abline(h=cutoff_options$absolute,col='grey',lty=2)
 abline(v=length(rankBy_vector)-length(superEnhancerRows),col='grey',lty=2)
 lines(length(rankBy_vector):1,rankBy_vector[signalOrder],lwd=4, col='red')
-text(0,0.8*max(rankBy_vector),paste(' Cutoff used: ',cutoff_options$absolute,'\n','Super-Enhancers identified: ',length(superEnhancerRows)),pos=4)
+text(0,0.8*max(rankBy_vector),paste(' Cutoff used: ',cutoff_options$absolute,'\n','Super-Stitched peaks identified: ',length(superEnhancerRows)),pos=4)
 
 dev.off()
 
 
 
 
 #Writing a bed file
-bedFileName = paste(outFolder,enhancerName,'_Enhancers_withSuper.bed',sep='')
-convert_stitched_to_bed(stitched_regions,paste(rankBy_factor,"Enhancers"), enhancerDescription,bedFileName,score=rankBy_vector,splitSuper=TRUE,superRows= superEnhancerRows,baseColor="0,0,0",superColor="255,0,0")
+bedFileName = paste(outFolder,enhancerName,'_Stitched_withSuper.bed',sep='')
+convert_stitched_to_bed(stitched_regions,paste(rankBy_factor,"Stitched"), enhancerDescription,bedFileName,score=rankBy_vector,splitSuper=TRUE,superRows= superEnhancerRows,baseColor="0,0,0",superColor="255,0,0")
 
 
 
@@ -168,8 +168,8 @@ additionalTableData[superEnhancerRows,2] <- 1
 
 
 #Writing enhancer and super-enhancer tables with enhancers ranked and super status annotated
-enhancerTableFile = paste(outFolder,enhancerName,'_AllEnhancers.table.txt',sep='')
+enhancerTableFile = paste(outFolder,enhancerName,'_AllStitched.table.txt',sep='')
 writeSuperEnhancer_table(stitched_regions, enhancerDescription,enhancerTableFile, additionalData= additionalTableData)
 
-superTableFile = paste(outFolder,enhancerName,'_SuperEnhancers.table.txt',sep='')
+superTableFile = paste(outFolder,enhancerName,'_SuperStitched.table.txt',sep='')
 writeSuperEnhancer_table(true_super_enhancers, enhancerDescription,superTableFile, additionalData= additionalTableData[superEnhancerRows,])
diff --git a/bin/ROSE_geneMapper.py b/bin/ROSE_geneMapper.py
@@ -217,6 +217,8 @@ def main():
                       help = "Enter a ROSE ranked enhancer or super-enhancer file")
     parser.add_option("-g","--genome", dest="genome",nargs = 1, default=None,
                       help = "Enter the genome build (MM9,MM8,HG18,HG19,HG38)")
+    parser.add_option("--custom", dest="custom_genome", default=None,
+                      help = "Enter the custom genome annotation .ucsc")
 
     #optional flags
     parser.add_option("-l","--list", dest="geneList",nargs = 1, default=None,
@@ -230,7 +232,7 @@ def main():
     (options,args) = parser.parse_args()
 
 
-    if not options.input or not options.genome:
+    if not options.input or not (options.genome or options.custom_genome):
 
         parser.print_help()
         exit()
@@ -245,11 +247,6 @@ def main():
         outFolder = '/'.join(enhancerFile.split('/')[0:-1]) + '/'
 
 
-    #GETTING THE GENOME
-    genome = options.genome
-    print(('USING %s AS THE GENOME' % genome))
-
-
     #GETTING THE CORRECT ANNOT FILE
     cwd = os.getcwd()
     genomeDict = {
@@ -261,7 +258,14 @@ def main():
         'MM10':'%s/annotation/mm10_refseq.ucsc' % (cwd),
         }
 
-    annotFile = genomeDict[genome.upper()]
+    #GETTING THE GENOME
+    if options.custom_genome:
+        annotFile = options.custom_genome
+        print('USING CUSTOM GENOME %s AS THE GENOME FILE' % options.custom_genome)
+    else:
+        genome = options.genome
+        annotFile = genomeDict[genome.upper()]
+        print('USING %s AS THE GENOME' % genome)
 
     #GETTING THE TRANSCRIBED LIST
     if options.geneList:
@@ -276,11 +280,11 @@ def main():
     enhancerFileName = enhancerFile.split('/')[-1].split('.')[0]
 
     #writing the enhancer table
-    out1 = '%s%s_ENHANCER_TO_GENE.txt' % (outFolder,enhancerFileName)
+    out1 = '%s%s_REGION_TO_GENE.txt' % (outFolder,enhancerFileName)
     ROSE_utils.unParseTable(enhancerToGeneTable,out1,'\t')
 
     #writing the gene table
-    out2 = '%s%s_GENE_TO_ENHANCER.txt' % (outFolder,enhancerFileName)
+    out2 = '%s%s_GENE_TO_REGION.txt' % (outFolder,enhancerFileName)
     ROSE_utils.unParseTable(geneToEnhancerTable,out2,'\t')
 
 if __name__ == "__main__":

diff --git a/bin/ROSE_main.py b/bin/ROSE_main.py
@@ -226,8 +226,10 @@ def main():
                       help = "bamfile to rank enhancer by")
     parser.add_option("-o","--out", dest="out",nargs = 1, default=None,
                       help = "Enter an output folder")
-    parser.add_option("-g","--genome", dest="genome",nargs = 1, default=None,
+    parser.add_option("-g","--genome", dest="genome", default=None,
                       help = "Enter the genome build (MM9,MM8,HG18,HG19,HG38)")
+    parser.add_option("--custom", dest="custom_genome", default=None,
+                      help = "Enter the custom genome annotation .ucsc")
 
     #optional flags
     parser.add_option("-b","--bams", dest="bams",nargs = 1, default=None,
@@ -246,7 +248,7 @@ def main():
     (options,args) = parser.parse_args()
 
 
-    if not options.input or not options.rankby or not options.out or not options.genome:
+    if not options.input or not options.rankby or not options.out or not (options.genome or options.custom_genome):
         print('hi there')
         parser.print_help()
         exit()
@@ -304,12 +306,6 @@ def main():
     print(('USING %s AS THE INPUT GFF' % (inputGFFFile)))
     inputName = inputGFFFile.split('/')[-1].split('.')[0]
 
-
-    #GETTING THE GENOME
-    genome = options.genome
-    print(('USING %s AS THE GENOME' % genome))
-
-
     #GETTING THE CORRECT ANNOT FILE
     cwd = os.getcwd()
     genomeDict = {
@@ -321,7 +317,15 @@ def main():
         'MM10':'%s/annotation/mm10_refseq.ucsc' % (cwd),
         }
 
-    annotFile = genomeDict[genome.upper()]
+    #GETTING THE GENOME
+    if options.custom_genome:
+        annotFile = options.custom_genome
+        print('USING CUSTOM GENOME %s AS THE GENOME FILE' % options.custom_genome)
+    else:
+        genome = options.genome
+        annotFile = genomeDict[genome.upper()]
+        print('USING %s AS THE GENOME' % genome)
+
 
     #MAKING THE START DICT
     print('MAKING START DICT')
@@ -365,7 +369,7 @@ def main():
 
 
     #SETTING UP THE OVERALL OUTPUT FILE
-    outputFile1 = outFolder + stitchedGFFName + '_ENHANCER_REGION_MAP.txt'
+    outputFile1 = outFolder + stitchedGFFName + '_REGION_MAP.txt'
 
     print(('OUTPUT WILL BE WRITTEN TO  %s' % (outputFile1)))
 
@@ -470,15 +474,23 @@ def main():
 
     #calling the gene mapper
     time.sleep(60)
-    superTableFile = "%s/%s_SuperEnhancers.table.txt" % (outFolder,inputName)
-    cmd = "ROSE_geneMapper.py -g %s -i %s -r TRUE" % (genome,superTableFile)
-    print(cmd)
-    os.system(cmd)
+    superTableFile = "%s/%s_SuperStitched.table.txt" % (outFolder,inputName)
+    allTableFile = "%s/%s_AllStitched.table.txt" % (outFolder,inputName)
+
+    if options.custom_genome:
+        cmd1 = "ROSE_geneMapper.py --custom %s -i %s -r TRUE" % (options.custom_genome,superTableFile)
+        cmd2 = "ROSE_geneMapper.py --custom %s -i %s -r TRUE" % (options.custom_genome,allTableFile)
+    else:
+        cmd1 = "ROSE_geneMapper.py -g %s -i %s -r TRUE" % (genome,superTableFile)
+        cmd2 = "ROSE_geneMapper.py -g %s -i %s -r TRUE" % (genome,allTableFile)
+
+    #gene mapper for superenhancers
+    print(cmd1)
+    os.system(cmd1)
 
-    allTableFile = "%s/%s_AllEnhancers.table.txt" % (outFolder,inputName)
-    cmd = "ROSE_geneMapper.py -g %s -i %s -r TRUE" % (genome,allTableFile)
-    print(cmd)
-    os.system(cmd)
+    #gene mapper for enhancers
+    print(cmd2)
+    os.system(cmd2)
 
 if __name__ == "__main__":
     main()