From 1c205a65f20997e44f5a9e89113b2785eedf6ee2 Mon Sep 17 00:00:00 2001 From: Adetunji Date: Tue, 7 Apr 2020 13:04:57 -0500 Subject: [PATCH] removed wrapper --- README.md | 46 ++++++++++++++++-------- ROSE-local.sh | 13 +++---- bin/ROSE-call.sh | 91 ------------------------------------------------ 3 files changed, 39 insertions(+), 111 deletions(-) delete mode 100755 bin/ROSE-call.sh diff --git a/README.md b/README.md index 9750677..e528b09 100755 --- a/README.md +++ b/README.md @@ -8,32 +8,50 @@ CLONED using SOURCETREE from: https://bitbucket.org/young_computation/rose/src/m #### === Changelog 1. USAGE - - Option 1: To run the program locally and independent of software location by calling ROSE-local.sh - NB: First open ROSE-local.sh and modify PATHTO with the PATH ROSE is installed in. - > ROSE-local.sh ["GTF file"] ["BAM file"] ["OutputDir"] ["feature type"] ["species"] ["bed fileA"] ["bed fileB"] - - - Option 2: Add ROSE to user executable $PATH - ```bash - PATHTO=/path/to/ROSE - PYTHONPATH=$PATHTO/lib - export PYTHONPATH - export PATH=$PATH:$PATHTO/bin - ``` + ```bash + PATHTO=/path/to/ROSE + PYTHONPATH=$PATHTO/lib + export PYTHONPATH + export PATH=$PATH:$PATHTO/bin + + ROSE_main.py [options] -g [GENOME] -i [INPUT_REGION_GFF] -r [RANKBY_BAM_FILE] -o [OUTPUT_FOLDER] [OPTIONAL_FLAGS] + ``` 1. Update: * ROSE is executable independent of software directory location. - * ROSE has a wrapper script "ROSE-local.sh" to successfully execute all steps of the package, else add ROSE to user executable $PATH * ROSE is compatible with python3 +1. REQUIREMENTS: + + 1. All files : + All input files much be in one directory. + + 1. Annotation file : + Annotation file should be in UCSC table track format (https://genome.ucsc.edu/cgi-bin/hgTables). + Annotation file should be saved as [GENOME]_refseq.ucsc (example: hg19_refseq.ucsc). + Annotation file should be in annotation/ folder in the input files directory. + + 1. BAM files (of sequencing reads for factor of interest and control) : + Files must have chromosome IDs starting with "chr" + Files must be sorted and indexed using SAMtools in order for bamToGFF.py to work. (http://samtools.sourceforge.net/samtools.shtml) + + 1. Peak file of constituent enhancers : + File must be in GFF format with the following columns: + 1: chromosome (chr#) + 2: unique ID for each constituent enhancer region + 4: start of constituent + 5: end of constituent + 7: strand (+,-,.) + 9: unique ID for each constituent enhancer region + NOTE: if value for column 2 and 9 differ, value in column 2 will be used + 1. DIRECTORY structure ``` ├── LICENSE.txt │ ├── README.md │ - ├── ROSE-local.sh : bash wrapper - │ ├── bin │   ├── ROSE_bamToGFF.py : calculates density of .bam reads in .gff regions │   ├── ROSE_callSuper.R : ranks regions by their densities, creates cutoff diff --git a/ROSE-local.sh b/ROSE-local.sh index decb73b..cab5781 100755 --- a/ROSE-local.sh +++ b/ROSE-local.sh @@ -1,7 +1,7 @@ #!/bin/bash # # Rose Caller to detect both Enhancers and Super-Enhancers -# +# Hardcoded implementation of ROSE for St. Jude, Abraham's lab. # Version 1 11/16/2019 ############################################################## @@ -38,7 +38,7 @@ FEATURE=${FEATURE:=gene} # Species SPECIES=$5 -SPECIES=${SPECIES:=hg_19} +SPECIES=${SPECIES:=hg19} # Bed File A FILEA=$6 @@ -67,7 +67,8 @@ echo "Species: $SPECIES" echo "Feature type: $FEATURE" #================================================================================ # -# GENERATING UCSC REFSEQ FILE +# UCSC TRACK FORMAT ANNOTATION FILE +# Generate UCSC table track annotation file using NCBI GTF refseq. # mkdir -p annotation echo -e "#bin\tname\tchrom\tstrand\ttxStart\ttxEnd\tcdsStart\tcdsEnd\tX\tX\tX\t\tX\tname2" > annotation/$SPECIES"_refseq.ucsc" @@ -85,14 +86,14 @@ fi echo "Annotation file: "$SPECIES"_refseq.ucsc" # -# GENERATING MERGE BED FILES -# +# INPUT CONSTITUENT FILE +# merge peak bed files generated from MACS1 "keep_dup=all" and "keep_dup=auto" to generate constituent enhancers. cat $FILEA $FILEB | sort -k1,1 -k2,2n | mergeBed -i - | awk -F\\t '{print $1 "\t" NR "\t\t" $2 "\t" $3 "\t\t.\t\t" NR}' > unionpeaks.gff echo "Merge Bed file: unionpeaks.gff" echo # -# ROSE CALLER +# ROSE # ROSE_main.py -s $STITCH -t $TSS -g $SPECIES -i unionpeaks.gff -r $BAMFILE -o $OUTPUTDIR diff --git a/bin/ROSE-call.sh b/bin/ROSE-call.sh deleted file mode 100755 index d559212..0000000 --- a/bin/ROSE-call.sh +++ /dev/null @@ -1,91 +0,0 @@ -#!/bin/bash -# -# Rose Caller to detect both Enhancers and Super-Enhancers -# -# Version 1 11/16/2019 -# Updated: 1/9/2020 - -if [ $# -lt 7 ]; then - echo "" - echo 1>&2 Usage: $0 ["GTF file"] ["BAM file"] ["OutputDir"] ["feature type"] ["species"] ["bed fileA"] ["bed fileB"] - echo "" - exit 1 -fi - -#================================================================================ -#Parameters for running - -# GTF files -GTFFILE=$1 - -# BAM file -BAMFILE=$2 - -# Output Directory -OUTPUTDIR=$3 -OUTPUTDIR=${OUTPUTDIR:=ROSE_out} - -# Feature type -FEATURE=$4 -FEATURE=${FEATURE:=gene} - -# Species -SPECIES=$5 -SPECIES=${SPECIES:=hg_19} - -# Bed File A -FILEA=$6 - -# Bed File B -FILEB=$7 - -# Transcription Start Size Window -#TSS= -TSS=${TSS:=2000} - -# Maximum linking distance for stitching -#STITCH= -STITCH=${STITCH:=12500} - - -echo "#############################################" -echo "###### ROSE v1 ######" -echo "#############################################" - -echo "Input Bed File A: $FILEA" -echo "Input Bed File B: $FILEB" -echo "BAM file: $BAMFILE" -echo "Output directory: $OUTPUTDIR" -echo "Species: $SPECIES" -echo "Feature type: $FEATURE" -#================================================================================ -# -# GENERATING UCSC REFSEQ FILE -# -mkdir -p annotation -echo -e "#bin\tname\tchrom\tstrand\ttxStart\ttxEnd\tcdsStart\tcdsEnd\tX\tX\tX\t\tX\tname2" > annotation/$SPECIES"_refseq.ucsc" - -if [[ $FEATURE == "gene" ]]; then -awk -F'[\t ]' '{ - if($3=="gene") - print "0\t" $14 "\tchr" $1 "\t" $7 "\t" $4 "\t" $5 "\t" $4 "\t" $5 "\t.\t.\t.\t.\t" $18}' $GTFFILE | sed s/\"//g >> annotation/$SPECIES"_refseq.ucsc" - -elif [[ $FEATURE == "transcript" ]]; then -awk -F'[\t ]' '{ - if($3=="transcript") - print "0\t" $14 "\tchr" $1 "\t" $7 "\t" $4 "\t" $5 "\t" $4 "\t" $5 "\t.\t.\t.\t.\t" $18}' $GTFFILE | sed s/\"//g >> annotation/$SPECIES"_refseq.ucsc" -fi -echo "Annotation file: "$SPECIES"_refseq.ucsc" - -# -# GENERATING MERGE BED FILES -# -cat $FILEA $FILEB | sort -k1,1 -k2,2n | mergeBed -i - | awk -F\\t '{print $1 "\t" NR "\t\t" $2 "\t" $3 "\t\t.\t\t" NR}' > unionpeaks.gff -echo "Merge Bed file: unionpeaks.gff" -echo - -# -# ROSE CALLER -# -ROSE_main.py -s $STITCH -t $TSS -g $SPECIES -i unionpeaks.gff -r $BAMFILE -o $OUTPUTDIR -echo "Done!"