From 1c205a65f20997e44f5a9e89113b2785eedf6ee2 Mon Sep 17 00:00:00 2001
From: Adetunji <madetunj@splprhpc06.cm.cluster>
Date: Tue, 7 Apr 2020 13:04:57 -0500
Subject: [PATCH] removed wrapper

---
 README.md        | 46 ++++++++++++++++--------
 ROSE-local.sh    | 13 +++----
 bin/ROSE-call.sh | 91 ------------------------------------------------
 3 files changed, 39 insertions(+), 111 deletions(-)
 delete mode 100755 bin/ROSE-call.sh

diff --git a/README.md b/README.md
index 9750677..e528b09 100755
--- a/README.md
+++ b/README.md
@@ -8,32 +8,50 @@ CLONED using SOURCETREE from: https://bitbucket.org/young_computation/rose/src/m
 #### === Changelog
 1. USAGE
 
-	- Option 1: To run the program locally and independent of software location by calling ROSE-local.sh
-		NB: First open ROSE-local.sh and modify PATHTO with the PATH ROSE is installed in.
-		> ROSE-local.sh ["GTF file"] ["BAM file"] ["OutputDir"] ["feature type"] ["species"] ["bed fileA"] ["bed fileB"]
-
-	- Option 2: Add ROSE to user executable $PATH
-		```bash
-		PATHTO=/path/to/ROSE
-		PYTHONPATH=$PATHTO/lib
-		export PYTHONPATH
-		export PATH=$PATH:$PATHTO/bin
-		```
+	```bash
+	PATHTO=/path/to/ROSE
+	PYTHONPATH=$PATHTO/lib
+	export PYTHONPATH
+	export PATH=$PATH:$PATHTO/bin
+
+	ROSE_main.py [options] -g [GENOME] -i [INPUT_REGION_GFF] -r [RANKBY_BAM_FILE] -o [OUTPUT_FOLDER] [OPTIONAL_FLAGS]
+	```
 
 1. Update: 
 
 	* ROSE is executable independent of software directory location.
-	* ROSE has a wrapper script "ROSE-local.sh" to successfully execute all steps of the package, else add ROSE to user executable $PATH
 	* ROSE is compatible with python3
 
+1. REQUIREMENTS:
+
+	1. All files :
+	All input files much be in one directory.
+
+	1. Annotation file :
+	Annotation file should be in UCSC table track format (https://genome.ucsc.edu/cgi-bin/hgTables).
+	Annotation file should be saved as [GENOME]_refseq.ucsc (example: hg19_refseq.ucsc).
+	Annotation file should be in annotation/ folder in the input files directory.
+
+	1. BAM files (of sequencing reads for factor of interest and control) :
+	Files must have chromosome IDs starting with "chr"
+	Files must be sorted and indexed using SAMtools in order for bamToGFF.py to work. (http://samtools.sourceforge.net/samtools.shtml)
+
+	1. Peak file of constituent enhancers :
+	File must be in GFF format with the following columns:
+		1: chromosome (chr#)
+		2: unique ID for each constituent enhancer region
+		4: start of constituent
+		5: end of constituent
+		7: strand (+,-,.)
+		9: unique ID for each constituent enhancer region
+		NOTE: if value for column 2 and 9 differ, value in column 2 will be used
+
 1. DIRECTORY structure
 	```
 	├── LICENSE.txt
 	│
 	├── README.md
 	│
-	├── ROSE-local.sh : bash wrapper
-	│
 	├── bin
 	│   ├── ROSE_bamToGFF.py : calculates density of .bam reads in .gff regions
 	│   ├── ROSE_callSuper.R : ranks regions by their densities, creates cutoff
diff --git a/ROSE-local.sh b/ROSE-local.sh
index decb73b..cab5781 100755
--- a/ROSE-local.sh
+++ b/ROSE-local.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 #
 # Rose Caller to detect both Enhancers and Super-Enhancers
-#
+# Hardcoded implementation of ROSE for St. Jude, Abraham's lab.
 # Version 1 11/16/2019
 
 ##############################################################
@@ -38,7 +38,7 @@ FEATURE=${FEATURE:=gene}
 
 # Species
 SPECIES=$5
-SPECIES=${SPECIES:=hg_19}
+SPECIES=${SPECIES:=hg19}
 
 # Bed File A
 FILEA=$6
@@ -67,7 +67,8 @@ echo "Species: $SPECIES"
 echo "Feature type: $FEATURE"
 #================================================================================
 # 
-# GENERATING UCSC REFSEQ FILE
+# UCSC TRACK FORMAT ANNOTATION FILE 
+# Generate UCSC table track annotation file using NCBI GTF refseq.
 #
 mkdir -p annotation
 echo -e "#bin\tname\tchrom\tstrand\ttxStart\ttxEnd\tcdsStart\tcdsEnd\tX\tX\tX\t\tX\tname2" > annotation/$SPECIES"_refseq.ucsc"
@@ -85,14 +86,14 @@ fi
 echo "Annotation file: "$SPECIES"_refseq.ucsc"
 
 #
-# GENERATING MERGE BED FILES
-#
+# INPUT CONSTITUENT FILE
+# merge peak bed files generated from MACS1 "keep_dup=all" and "keep_dup=auto" to generate constituent enhancers.
 cat $FILEA $FILEB | sort -k1,1 -k2,2n | mergeBed -i - | awk -F\\t '{print $1 "\t" NR "\t\t" $2 "\t" $3 "\t\t.\t\t" NR}' > unionpeaks.gff
 echo "Merge Bed file: unionpeaks.gff"
 echo
 
 #
-# ROSE CALLER
+# ROSE
 #
 ROSE_main.py -s $STITCH -t $TSS -g $SPECIES -i unionpeaks.gff -r $BAMFILE -o $OUTPUTDIR
 
diff --git a/bin/ROSE-call.sh b/bin/ROSE-call.sh
deleted file mode 100755
index d559212..0000000
--- a/bin/ROSE-call.sh
+++ /dev/null
@@ -1,91 +0,0 @@
-#!/bin/bash
-#
-# Rose Caller to detect both Enhancers and Super-Enhancers
-#
-# Version 1 11/16/2019
-# Updated: 1/9/2020
-
-if [ $# -lt 7 ]; then
-  echo ""
-  echo 1>&2 Usage: $0 ["GTF file"] ["BAM file"] ["OutputDir"] ["feature type"] ["species"] ["bed fileA"] ["bed fileB"]
-  echo ""
-  exit 1
-fi
-
-#================================================================================
-#Parameters for running
-
-# GTF files
-GTFFILE=$1
-
-# BAM file
-BAMFILE=$2
-
-# Output Directory
-OUTPUTDIR=$3
-OUTPUTDIR=${OUTPUTDIR:=ROSE_out}
-
-# Feature type
-FEATURE=$4
-FEATURE=${FEATURE:=gene}
-
-# Species
-SPECIES=$5
-SPECIES=${SPECIES:=hg_19}
-
-# Bed File A
-FILEA=$6
-
-# Bed File B
-FILEB=$7
-
-# Transcription Start Size Window
-#TSS=
-TSS=${TSS:=2000}
-
-# Maximum linking distance for stitching
-#STITCH=
-STITCH=${STITCH:=12500}
-
-
-echo "#############################################"
-echo "######             ROSE v1             ######"
-echo "#############################################"
-
-echo "Input Bed File A: $FILEA"
-echo "Input Bed File B: $FILEB"
-echo "BAM file: $BAMFILE"
-echo "Output directory: $OUTPUTDIR"
-echo "Species: $SPECIES"
-echo "Feature type: $FEATURE"
-#================================================================================
-# 
-# GENERATING UCSC REFSEQ FILE
-#
-mkdir -p annotation
-echo -e "#bin\tname\tchrom\tstrand\ttxStart\ttxEnd\tcdsStart\tcdsEnd\tX\tX\tX\t\tX\tname2" > annotation/$SPECIES"_refseq.ucsc"
-
-if [[ $FEATURE == "gene" ]]; then
-awk -F'[\t ]' '{
-  if($3=="gene")
-    print "0\t" $14 "\tchr" $1 "\t" $7 "\t" $4 "\t" $5 "\t" $4 "\t" $5 "\t.\t.\t.\t.\t" $18}' $GTFFILE | sed s/\"//g >> annotation/$SPECIES"_refseq.ucsc"
-
-elif [[ $FEATURE == "transcript" ]]; then
-awk -F'[\t ]' '{
-  if($3=="transcript")
-    print "0\t" $14 "\tchr" $1 "\t" $7 "\t" $4 "\t" $5 "\t" $4 "\t" $5 "\t.\t.\t.\t.\t" $18}' $GTFFILE | sed s/\"//g >> annotation/$SPECIES"_refseq.ucsc"
-fi
-echo "Annotation file: "$SPECIES"_refseq.ucsc"
-
-#
-# GENERATING MERGE BED FILES
-#
-cat $FILEA $FILEB | sort -k1,1 -k2,2n | mergeBed -i - | awk -F\\t '{print $1 "\t" NR "\t\t" $2 "\t" $3 "\t\t.\t\t" NR}' > unionpeaks.gff
-echo "Merge Bed file: unionpeaks.gff"
-echo
-
-#
-# ROSE CALLER
-#
-ROSE_main.py -s $STITCH -t $TSS -g $SPECIES -i unionpeaks.gff -r $BAMFILE -o $OUTPUTDIR
-echo "Done!"