From be764e1facacd59592c3f87c3be5dee25f739be0 Mon Sep 17 00:00:00 2001 From: Harriet Dashnow Date: Tue, 8 Aug 2017 17:46:24 +1000 Subject: [PATCH 01/30] first draft at Travis CI set up --- .travis.yml | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..6678037 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,28 @@ +language: r +# r: +# - oldrel +# - release +# - devel +cache: packages +install: + - git clone git@github.com:Oshlack/STRetch.git STRetch + - cd STRetch + - ./install.sh +# R +# > install.packages(c('optparse','plyr','dplyr','tidyr','reshape2')) +# > q() +# Download the test data + - mkdir test + - cd test/ + - wget -O testdata.zip https://ndownloader.figshare.com/articles/4762489?private_link=cc7347f4637d9a7fe22d + - unzip testdata.zip + - rm testdata.zip +Edit pipeline_config file to point to the exome target: + + - cat ../pipelines/pipeline_config.groovy # or use the editor of your choice +# EXOME_TARGET="SCA8_region.bed" + +# command to run tests +script: +# Run the test data +- ../tools/bin/bpipe run ../pipelines/STRetch_exome_pipeline.groovy *.fastq.gz From b3224e95a28a8994545b9a5e061333f036d345f6 Mon Sep 17 00:00:00 2001 From: Harriet Dashnow Date: Tue, 8 Aug 2017 17:53:32 +1000 Subject: [PATCH 02/30] remove clone step --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 6678037..6a2932f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,8 +5,8 @@ language: r # - devel cache: packages install: - - git clone git@github.com:Oshlack/STRetch.git STRetch - - cd STRetch +# - git clone git@github.com:Oshlack/STRetch.git STRetch +# - cd STRetch - ./install.sh # R # > install.packages(c('optparse','plyr','dplyr','tidyr','reshape2')) From 47225186629e02b67a23849852aaf64ccf210238 Mon Sep 17 00:00:00 2001 From: Harriet Dashnow Date: Wed, 9 Aug 2017 19:04:09 +1000 Subject: [PATCH 03/30] R packages --- .travis.yml | 6 +++--- install-packages.R | 4 ++++ 2 files changed, 7 insertions(+), 3 deletions(-) create mode 100644 install-packages.R diff --git a/.travis.yml b/.travis.yml index 6a2932f..09256f9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,10 +7,10 @@ cache: packages install: # - git clone git@github.com:Oshlack/STRetch.git STRetch # - cd STRetch + - pwd - ./install.sh -# R -# > install.packages(c('optparse','plyr','dplyr','tidyr','reshape2')) -# > q() +# Install R packages + - R CMD BATCH install-packages.R # Download the test data - mkdir test - cd test/ diff --git a/install-packages.R b/install-packages.R new file mode 100644 index 0000000..e2bbee5 --- /dev/null +++ b/install-packages.R @@ -0,0 +1,4 @@ +## Create the personal library if it doesn't exist. Ignore a warning if the directory already exists. +dir.create(Sys.getenv("R_LIBS_USER"), showWarnings = FALSE, recursive = TRUE) +## Install packages +install.packages(c('optparse','plyr','dplyr','tidyr','reshape2')) From 7f4f6afd8cc81f31a7309bb1968996b703ef73c5 Mon Sep 17 00:00:00 2001 From: Harriet Dashnow Date: Wed, 9 Aug 2017 19:09:15 +1000 Subject: [PATCH 04/30] Make install.sh executable --- install.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 install.sh diff --git a/install.sh b/install.sh old mode 100644 new mode 100755 From 3179a1b252503a6fa9460f6f23c32e42746c7fa5 Mon Sep 17 00:00:00 2001 From: Harriet Dashnow Date: Mon, 14 Aug 2017 14:21:02 +1000 Subject: [PATCH 05/30] check disk space --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 09256f9..712f25f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,6 +8,7 @@ install: # - git clone git@github.com:Oshlack/STRetch.git STRetch # - cd STRetch - pwd + - df -h - ./install.sh # Install R packages - R CMD BATCH install-packages.R From 0b7991fee43449fd05fd7cbb896b80b5b1f46b30 Mon Sep 17 00:00:00 2001 From: Harriet Dashnow Date: Tue, 29 Aug 2017 17:08:12 +1000 Subject: [PATCH 06/30] smaller reference and more data for testing --- .travis.yml | 7 +-- install-ci.sh | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 160 insertions(+), 5 deletions(-) create mode 100755 install-ci.sh diff --git a/.travis.yml b/.travis.yml index 712f25f..1fd46c4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,15 +9,12 @@ install: # - cd STRetch - pwd - df -h - - ./install.sh + - ./install-ci.sh # Install R packages - R CMD BATCH install-packages.R # Download the test data - mkdir test - cd test/ - - wget -O testdata.zip https://ndownloader.figshare.com/articles/4762489?private_link=cc7347f4637d9a7fe22d - - unzip testdata.zip - - rm testdata.zip Edit pipeline_config file to point to the exome target: - cat ../pipelines/pipeline_config.groovy # or use the editor of your choice @@ -26,4 +23,4 @@ Edit pipeline_config file to point to the exome target: # command to run tests script: # Run the test data -- ../tools/bin/bpipe run ../pipelines/STRetch_exome_pipeline.groovy *.fastq.gz +- ../tools/bin/bpipe run ../pipelines/STRetch_exome_pipeline.groovy ../test-data/*.fastq.gz diff --git a/install-ci.sh b/install-ci.sh new file mode 100755 index 0000000..376eda7 --- /dev/null +++ b/install-ci.sh @@ -0,0 +1,158 @@ +#!/bin/bash + +## This script will install the tools required for the STRetch pipeline. +## It will fetched each tool from the web and placed into the tools/ subdirectory. +## Paths to all installed tools can be found in the file tools.groovy at the +## end of execution of this script. These paths can be changed if a different +## version of software is required. Note that R must be installed manually +## + +installdir=$PWD +refdir=$PWD/reference-data +toolspec=$PWD/pipelines/pipeline_config.groovy +template=$PWD/pipelines/config-examples/pipeline_config_template.groovy + +mkdir -p tools/bin +cd tools + +#a list of which programs need to be installed +commands="bpipe python goleft bedtools bwa samtools" + +#installation method +function bpipe_install { + wget -O bpipe-0.9.9.2.tar.gz https://github.com/ssadedin/bpipe/releases/download/0.9.9.2/bpipe-0.9.9.2.tar.gz + tar -zxvf bpipe-0.9.9.2.tar.gz ; rm bpipe-0.9.9.2.tar.gz + ln -s $PWD/bpipe-0.9.9.2/bin/* $PWD/bin/ +} + +# Installs miniconda, Python 3 + required packages, BedTools and goleft +# (and any other dependancies listed in environment.yml) +function python_install { + wget -O miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh + bash miniconda.sh -b -p $PWD/miniconda + rm miniconda.sh + $PWD/miniconda/bin/conda env create -f ../environment.yml + ln -s $PWD/miniconda/envs/STR/bin/* $PWD/bin/ +# source activate STR +} + +function bwa_install { + wget -O bwakit-0.7.15_x64-linux.tar.bz2 --no-check-certificate https://github.com/lh3/bwa/releases/download/v0.7.15/bwakit-0.7.15_x64-linux.tar.bz2 + tar -jxvf bwakit-0.7.15_x64-linux.tar.bz2 + rm bwakit-0.7.15_x64-linux.tar.bz2 + ln -s $PWD/bwa.kit/* $PWD/bin/ +} + +function samtools_install { + wget --no-check-certificate https://sourceforge.net/projects/samtools/files/samtools/1.3.1/samtools-1.3.1.tar.bz2 + tar -jxvf samtools-1.3.1.tar.bz2 + rm samtools-1.3.1.tar.bz2 + make prefix=$PWD install -C samtools-1.3.1/ +} + +function download { + wget --no-check-certificate -O $refdir/reference-data.zip https://ndownloader.figshare.com/articles/5353399?private_link=be9bde235448e937e468 + unzip $refdir/reference-data.zip -d $refdir + rm $refdir/reference-data.zip + + mkdir test-data + mv $refdir/*.gz $refdir/*.bam $refdir/*.bai test-data +} + +#populate toolspec +echo "// Bpipe pipeline config file" > $toolspec +echo "// Paths are relative to the directory the pipeline is running in, so absolute" >> $toolspec +echo "// paths are recommended." >> $toolspec +echo >> $toolspec +echo "// Adjust parameters" >> $toolspec +echo "PLATFORM='illumina'" >> $toolspec +echo >> $toolspec +echo "// Number of threads to use for BWA" >> $toolspec +echo "threads=8" >> $toolspec +echo >> $toolspec +echo "// For exome pipeline only ***Edit before running the exome pipeline***" >> $toolspec +echo "EXOME_TARGET=\"path/to/exome_target_regions.bed\"" >> $toolspec +echo >> $toolspec + +#set STRetch base directory +echo "// STRetch installation location" >> $toolspec +echo "STRETCH=\"$installdir\"" >> $toolspec +echo >> $toolspec + +echo "// Paths to tools used by the pipeline" >> $toolspec + +for c in $commands ; do + c_path=`which $PWD/bin/$c 2>/dev/null` + if [ -z $c_path ] ; then + echo "$c not found, fetching it" + ${c}_install + c_path=`which $PWD/bin/$c 2>/dev/null` + fi + echo "$c=\"$c_path\"" >> $toolspec +done + +if [ ! -f $refdir/*dedup.sorted.bed ] ; then + mkdir -p $refdir + echo "Downloading reference and test data" + download +fi + +echo >> $toolspec +echo "// Path to reference data" >> $toolspec +echo "refdir=\"$refdir\"" >> $toolspec + +echo >> $toolspec +echo "// Decoy reference assumed to have matching .genome file in the same directory" >> $toolspec +echo "REF=\"$refdir/hg19.STRdecoys.sorted.fasta\"" >> $toolspec +echo "STR_BED=\"$refdir/hg19.simpleRepeat_period1-6_dedup.sorted.bed\"" >> $toolspec +echo "DECOY_BED=\"$refdir/STRdecoys.sorted.bed\"" >> $toolspec +echo "// By default, uses other samples in the same batch as a control" >> $toolspec +echo "CONTROL=\"\"" >> $toolspec +echo "// Uncomment the line below to use a set of WGS samples as controls, or specify your own" >> $toolspec +echo "//CONTROL=\"$refdir/PCRfreeWGS.controls.tsv\"" >> $toolspec +echo >> $toolspec + + +#loop through commands to check they are all installed +echo "**********************************************************" +echo "Checking that all required tools were installed:" +Final_message="All commands installed successfully!" +for c in $commands ; do + c_path=`which $PWD/bin/$c 2>/dev/null` + if [ -z $c_path ] ; then + echo -n "WARNING: $c could not be found!!!! " + echo "You will need to download and install $c manually, then add its path to $toolspec" + Final_message="WARNING: One or more command did not install successfully. See warning messages above. You will need to correct this before running STRetch." + else + echo "$c looks like it has been installed" + fi +done + +echo "**********************************************************" + +#check that R is installed +R_path=`which R 2>/dev/null` +if [ -z $R_path ] ; then + echo "R not found!" + echo "Please go to http://www.r-project.org/ and follow the installation instructions." + echo "Please also install the required R packages." +else + echo "R seems to be available." + echo "Make sure you are using the correct version of R and have installed all required packages." +fi +echo "R=\"$R_path\"" >> $toolspec + +echo "**********************************************************" + +#check for reference data +if [ ! -f $refdir/*dedup.sorted.bed ] ; then + echo -n "WARNING: reference files could not be found!!!! " + echo "You will need to download them manually, then add the path to $toolspec" +else + echo "It looks like the reference data has been downloaded" +fi + +echo "**********************************************************" +echo $Final_message +echo "Please make sure you have installed the required R packages:" +echo "install.packages(c('optparse','plyr','dplyr','tidyr','reshape2'))" From e2c89e0d9c1d49d38c3d76ce280aa777ff71444a Mon Sep 17 00:00:00 2001 From: Harriet Dashnow Date: Wed, 30 Aug 2017 13:56:47 +1000 Subject: [PATCH 07/30] show warnings --- install-packages.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/install-packages.R b/install-packages.R index e2bbee5..4ad8ec0 100644 --- a/install-packages.R +++ b/install-packages.R @@ -1,4 +1,4 @@ ## Create the personal library if it doesn't exist. Ignore a warning if the directory already exists. -dir.create(Sys.getenv("R_LIBS_USER"), showWarnings = FALSE, recursive = TRUE) +dir.create(Sys.getenv("R_LIBS_USER"), showWarnings = TRUE, recursive = TRUE) ## Install packages install.packages(c('optparse','plyr','dplyr','tidyr','reshape2')) From 3a3e7cbcd20f72f5a74050d864a1818a46ff8946 Mon Sep 17 00:00:00 2001 From: Harriet Dashnow Date: Wed, 30 Aug 2017 14:39:48 +1000 Subject: [PATCH 08/30] rscript verpose --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 1fd46c4..a6218c5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,7 +11,7 @@ install: - df -h - ./install-ci.sh # Install R packages - - R CMD BATCH install-packages.R + - Rscript --verbose install-packages.R # Download the test data - mkdir test - cd test/ From b6bf0a4b2a1485297a9c1347a689b32a79ca69dd Mon Sep 17 00:00:00 2001 From: Harriet Dashnow Date: Wed, 30 Aug 2017 16:26:09 +1000 Subject: [PATCH 09/30] debugging --- .travis.yml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index a6218c5..98e3be1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,14 +5,12 @@ language: r # - devel cache: packages install: -# - git clone git@github.com:Oshlack/STRetch.git STRetch -# - cd STRetch - - pwd - - df -h - ./install-ci.sh + - ls + - ls test-data # Install R packages - Rscript --verbose install-packages.R -# Download the test data +# Create working directory - mkdir test - cd test/ Edit pipeline_config file to point to the exome target: From 1edea6932bea2b363ec67f5d603d4794fe373069 Mon Sep 17 00:00:00 2001 From: Harriet Dashnow Date: Wed, 30 Aug 2017 16:40:46 +1000 Subject: [PATCH 10/30] debugging --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 98e3be1..7c6daf0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,7 +7,7 @@ cache: packages install: - ./install-ci.sh - ls - - ls test-data + - ls * # Install R packages - Rscript --verbose install-packages.R # Create working directory From 43aff8445b0376c49c63301613a1030631c9649a Mon Sep 17 00:00:00 2001 From: Harriet Dashnow Date: Wed, 30 Aug 2017 17:01:31 +1000 Subject: [PATCH 11/30] fix test data location --- install-ci.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/install-ci.sh b/install-ci.sh index 376eda7..0be552a 100755 --- a/install-ci.sh +++ b/install-ci.sh @@ -55,8 +55,8 @@ function download { unzip $refdir/reference-data.zip -d $refdir rm $refdir/reference-data.zip - mkdir test-data - mv $refdir/*.gz $refdir/*.bam $refdir/*.bai test-data + mkdir $installdir/test-data + mv $refdir/*.gz $refdir/*.bam $refdir/*.bai $installdir/test-data } #populate toolspec From 710ce491454ab6f185c26d38285b94ffc2d627b2 Mon Sep 17 00:00:00 2001 From: Harriet Dashnow Date: Wed, 30 Aug 2017 17:41:39 +1000 Subject: [PATCH 12/30] set reference data locations --- .travis.yml | 1 + install-ci.sh | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 7c6daf0..14a67b5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,6 +12,7 @@ install: - Rscript --verbose install-packages.R # Create working directory - mkdir test + - cp test-data/SCA8_region.bed test/ - cd test/ Edit pipeline_config file to point to the exome target: diff --git a/install-ci.sh b/install-ci.sh index 0be552a..c273da1 100755 --- a/install-ci.sh +++ b/install-ci.sh @@ -71,7 +71,7 @@ echo "// Number of threads to use for BWA" >> $toolspec echo "threads=8" >> $toolspec echo >> $toolspec echo "// For exome pipeline only ***Edit before running the exome pipeline***" >> $toolspec -echo "EXOME_TARGET=\"path/to/exome_target_regions.bed\"" >> $toolspec +echo "EXOME_TARGET=\"SCA8_region.bed\"" >> $toolspec echo >> $toolspec #set STRetch base directory @@ -103,13 +103,13 @@ echo "refdir=\"$refdir\"" >> $toolspec echo >> $toolspec echo "// Decoy reference assumed to have matching .genome file in the same directory" >> $toolspec -echo "REF=\"$refdir/hg19.STRdecoys.sorted.fasta\"" >> $toolspec +echo "REF=\"$refdir/hg19.chr13.STRdecoys.sorted.fasta\"" >> $toolspec echo "STR_BED=\"$refdir/hg19.simpleRepeat_period1-6_dedup.sorted.bed\"" >> $toolspec echo "DECOY_BED=\"$refdir/STRdecoys.sorted.bed\"" >> $toolspec echo "// By default, uses other samples in the same batch as a control" >> $toolspec echo "CONTROL=\"\"" >> $toolspec echo "// Uncomment the line below to use a set of WGS samples as controls, or specify your own" >> $toolspec -echo "//CONTROL=\"$refdir/PCRfreeWGS.controls.tsv\"" >> $toolspec +echo "CONTROL=\"$refdir/PCRfreeWGS.controls.tsv\"" >> $toolspec echo >> $toolspec From 3713e0a767399af8dc2570e3499d887fc4d4cb83 Mon Sep 17 00:00:00 2001 From: Harriet Dashnow Date: Wed, 30 Aug 2017 18:13:26 +1000 Subject: [PATCH 13/30] fix reference bed location --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 14a67b5..a8561d8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,7 +12,7 @@ install: - Rscript --verbose install-packages.R # Create working directory - mkdir test - - cp test-data/SCA8_region.bed test/ + - cp reference-data/SCA8_region.bed test/ - cd test/ Edit pipeline_config file to point to the exome target: From e34108634f7a9ef072f3a56593cfce9e5750a59d Mon Sep 17 00:00:00 2001 From: Harriet Dashnow Date: Thu, 31 Aug 2017 18:18:44 +1000 Subject: [PATCH 14/30] check disk space --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index a8561d8..95eecef 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,7 +6,6 @@ language: r cache: packages install: - ./install-ci.sh - - ls - ls * # Install R packages - Rscript --verbose install-packages.R @@ -14,6 +13,7 @@ install: - mkdir test - cp reference-data/SCA8_region.bed test/ - cd test/ + - df -h Edit pipeline_config file to point to the exome target: - cat ../pipelines/pipeline_config.groovy # or use the editor of your choice From d299d3b1dded764529d779c5be0a78843b0f4558 Mon Sep 17 00:00:00 2001 From: Harriet Dashnow Date: Fri, 8 Sep 2017 13:55:55 +1000 Subject: [PATCH 15/30] check bedtools version --- .travis.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 95eecef..e99d68f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,16 +6,14 @@ language: r cache: packages install: - ./install-ci.sh - - ls * # Install R packages - Rscript --verbose install-packages.R # Create working directory - mkdir test - cp reference-data/SCA8_region.bed test/ - cd test/ - - df -h + - bedtools --version Edit pipeline_config file to point to the exome target: - - cat ../pipelines/pipeline_config.groovy # or use the editor of your choice # EXOME_TARGET="SCA8_region.bed" From cec4947c5c1bf4840faa32fd39d2329fb06e7800 Mon Sep 17 00:00:00 2001 From: Harriet Dashnow Date: Fri, 8 Sep 2017 14:40:49 +1000 Subject: [PATCH 16/30] ls tools --- .travis.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index e99d68f..f726eb5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,13 +6,15 @@ language: r cache: packages install: - ./install-ci.sh + - ls * + - ls tools/* + - tools/bin/bedtools --version # Install R packages - Rscript --verbose install-packages.R # Create working directory - mkdir test - cp reference-data/SCA8_region.bed test/ - cd test/ - - bedtools --version Edit pipeline_config file to point to the exome target: - cat ../pipelines/pipeline_config.groovy # or use the editor of your choice # EXOME_TARGET="SCA8_region.bed" From bbda72702b2d8fa9a8887e35179c6f0d27aba8bd Mon Sep 17 00:00:00 2001 From: Harriet Dashnow Date: Fri, 8 Sep 2017 15:05:57 +1000 Subject: [PATCH 17/30] add tools/bin to path --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index f726eb5..3c81809 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,7 @@ install: - ./install-ci.sh - ls * - ls tools/* - - tools/bin/bedtools --version + - PATH=$PATH:$PWD/tools/bin # Install R packages - Rscript --verbose install-packages.R # Create working directory From f65f57e8976284231b66903fd76dbcbf3f3f5709 Mon Sep 17 00:00:00 2001 From: Harriet Dashnow Date: Fri, 8 Sep 2017 15:20:48 +1000 Subject: [PATCH 18/30] check file contents after run --- .travis.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 3c81809..3c725d0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,9 +6,7 @@ language: r cache: packages install: - ./install-ci.sh - - ls * - - ls tools/* - - PATH=$PATH:$PWD/tools/bin + - PATH=$PATH:$PWD/tools/bin # might not be appropriate for users to add to their PATH # Install R packages - Rscript --verbose install-packages.R # Create working directory @@ -23,3 +21,6 @@ Edit pipeline_config file to point to the exome target: script: # Run the test data - ../tools/bin/bpipe run ../pipelines/STRetch_exome_pipeline.groovy ../test-data/*.fastq.gz +after_script: +- head *.locus_counts *.STR_counts *.median_cov +- head *.tsv From d4f7c9dacd68a071d2d20e51cd476f7db463b0ed Mon Sep 17 00:00:00 2001 From: Harriet Dashnow Date: Thu, 14 Sep 2017 11:42:54 +1000 Subject: [PATCH 19/30] try adding bedtools dir to PATH in pipeline stage --- .travis.yml | 2 +- pipelines/pipeline_stages.groovy | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 3c725d0..d219bd8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,7 +6,7 @@ language: r cache: packages install: - ./install-ci.sh - - PATH=$PATH:$PWD/tools/bin # might not be appropriate for users to add to their PATH + # - PATH=$PATH:$PWD/tools/bin # might not be appropriate for users to add to their PATH # Install R packages - Rscript --verbose install-packages.R # Create working directory diff --git a/pipelines/pipeline_stages.groovy b/pipelines/pipeline_stages.groovy index 8e0ec0e..0397fcf 100644 --- a/pipelines/pipeline_stages.groovy +++ b/pipelines/pipeline_stages.groovy @@ -88,6 +88,7 @@ STR_coverage = { STR_locus_counts = { transform("bam") to ("locus_counts") { exec """ + PATH=$PATH:$STRETCH/tools/bin; $python $STRETCH/scripts/identify_locus.py --bam $input.bam --bed $STR_BED From cdb19ab30a63b6cb6d2a3b69e9edf000e1435d08 Mon Sep 17 00:00:00 2001 From: Harriet Dashnow Date: Fri, 15 Sep 2017 16:15:28 +1000 Subject: [PATCH 20/30] clean up travis script --- .travis.yml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/.travis.yml b/.travis.yml index d219bd8..fad5283 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,8 +1,4 @@ language: r -# r: -# - oldrel -# - release -# - devel cache: packages install: - ./install-ci.sh @@ -13,10 +9,6 @@ install: - mkdir test - cp reference-data/SCA8_region.bed test/ - cd test/ -Edit pipeline_config file to point to the exome target: - - cat ../pipelines/pipeline_config.groovy # or use the editor of your choice -# EXOME_TARGET="SCA8_region.bed" - # command to run tests script: # Run the test data From d8fd2b6b46c49e6ef8f29009dc42b50d61b2a1f6 Mon Sep 17 00:00:00 2001 From: Harriet Dashnow Date: Fri, 15 Sep 2017 16:21:20 +1000 Subject: [PATCH 21/30] fix for libbz2 error --- environment.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/environment.yml b/environment.yml index 1cc8bcb..d2402ec 100644 --- a/environment.yml +++ b/environment.yml @@ -1,5 +1,6 @@ name: STR channels: +- conda-forge - bioconda dependencies: - python=3* From 09794f0b475886f4973ec15115162b6ef67ae023 Mon Sep 17 00:00:00 2001 From: Harriet Dashnow Date: Fri, 15 Sep 2017 16:45:48 +1000 Subject: [PATCH 22/30] comment out R stage --- pipelines/STRetch_exome_pipeline.groovy | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pipelines/STRetch_exome_pipeline.groovy b/pipelines/STRetch_exome_pipeline.groovy index cf639b0..edc6187 100644 --- a/pipelines/STRetch_exome_pipeline.groovy +++ b/pipelines/STRetch_exome_pipeline.groovy @@ -16,7 +16,7 @@ run { align_bwa + index_bam + median_cov_region + STR_coverage + - STR_locus_counts - ] + - estimate_size + STR_locus_counts + ] //+ + //estimate_size } From 24862be4d345f626087ba1d412d6d483e7bc6dbe Mon Sep 17 00:00:00 2001 From: Harriet Dashnow Date: Fri, 15 Sep 2017 17:09:50 +1000 Subject: [PATCH 23/30] more of output files --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index fad5283..00e1b69 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,5 +14,5 @@ script: # Run the test data - ../tools/bin/bpipe run ../pipelines/STRetch_exome_pipeline.groovy ../test-data/*.fastq.gz after_script: -- head *.locus_counts *.STR_counts *.median_cov +- head -n 600 *.locus_counts *.STR_counts *.median_cov - head *.tsv From 81c4fbeb86441482deb6a78f62bb27b01504fe71 Mon Sep 17 00:00:00 2001 From: Harriet Dashnow Date: Fri, 22 Sep 2017 11:49:37 +1000 Subject: [PATCH 24/30] try installing r from conda --- .travis.yml | 6 ++++-- environment.yml | 1 + install-packages.R | 2 +- pipelines/pipeline_stages.groovy | 8 ++++---- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/.travis.yml b/.travis.yml index 00e1b69..cb739ad 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,10 +1,12 @@ -language: r +language: python cache: packages install: - ./install-ci.sh # - PATH=$PATH:$PWD/tools/bin # might not be appropriate for users to add to their PATH # Install R packages - - Rscript --verbose install-packages.R + - tools/bin/Rscript --verbose install-packages.R + - ls * + - ls tools/bin/ # Create working directory - mkdir test - cp reference-data/SCA8_region.bed test/ diff --git a/environment.yml b/environment.yml index d2402ec..ebb7505 100644 --- a/environment.yml +++ b/environment.yml @@ -4,6 +4,7 @@ channels: - bioconda dependencies: - python=3* +- R - BioPython - PyVCF - pysam diff --git a/install-packages.R b/install-packages.R index 4ad8ec0..bccefd9 100644 --- a/install-packages.R +++ b/install-packages.R @@ -1,4 +1,4 @@ ## Create the personal library if it doesn't exist. Ignore a warning if the directory already exists. dir.create(Sys.getenv("R_LIBS_USER"), showWarnings = TRUE, recursive = TRUE) ## Install packages -install.packages(c('optparse','plyr','dplyr','tidyr','reshape2')) +install.packages(c('optparse','plyr','dplyr','tidyr','reshape2'), repos="http://cran.rstudio.com/") diff --git a/pipelines/pipeline_stages.groovy b/pipelines/pipeline_stages.groovy index 0397fcf..2e72834 100644 --- a/pipelines/pipeline_stages.groovy +++ b/pipelines/pipeline_stages.groovy @@ -101,13 +101,13 @@ estimate_size = { produce("STRs.tsv") { if(CONTROL=="") { exec """ - Rscript $STRETCH/scripts/estimateSTR.R - --model $STRETCH/scripts/STRcov.model.csv + $STRETCH/tools/bin/Rscript $STRETCH/scripts/estimateSTR.R + --model $STRETCH/scripts/STRcov.model.csv """ } else { exec """ - Rscript $STRETCH/scripts/estimateSTR.R - --model $STRETCH/scripts/STRcov.model.csv + $STRETCH/tools/bin/Rscript $STRETCH/scripts/estimateSTR.R + --model $STRETCH/scripts/STRcov.model.csv --control $CONTROL """ } From b48cc6d9671ba0ff923cedd44161f6b32836a87f Mon Sep 17 00:00:00 2001 From: Harriet Dashnow Date: Fri, 22 Sep 2017 12:08:46 +1000 Subject: [PATCH 25/30] uncomment estimate size stage --- pipelines/STRetch_exome_pipeline.groovy | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipelines/STRetch_exome_pipeline.groovy b/pipelines/STRetch_exome_pipeline.groovy index edc6187..0e9b168 100644 --- a/pipelines/STRetch_exome_pipeline.groovy +++ b/pipelines/STRetch_exome_pipeline.groovy @@ -17,6 +17,6 @@ run { median_cov_region + STR_coverage + STR_locus_counts - ] //+ - //estimate_size + ] + + estimate_size } From 0c154f4b7f037a7a13b23ce6365521ba1bd96370 Mon Sep 17 00:00:00 2001 From: Harriet Dashnow Date: Fri, 22 Sep 2017 13:58:10 +1000 Subject: [PATCH 26/30] reduce intermediate outputs in travis script --- .travis.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index cb739ad..5c9221d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,8 +5,6 @@ install: # - PATH=$PATH:$PWD/tools/bin # might not be appropriate for users to add to their PATH # Install R packages - tools/bin/Rscript --verbose install-packages.R - - ls * - - ls tools/bin/ # Create working directory - mkdir test - cp reference-data/SCA8_region.bed test/ @@ -16,5 +14,5 @@ script: # Run the test data - ../tools/bin/bpipe run ../pipelines/STRetch_exome_pipeline.groovy ../test-data/*.fastq.gz after_script: -- head -n 600 *.locus_counts *.STR_counts *.median_cov +- head *.locus_counts *.STR_counts *.median_cov - head *.tsv From 3ced752bf02cd32897d8e95de0140b34308dff01 Mon Sep 17 00:00:00 2001 From: Harriet Dashnow Date: Fri, 22 Sep 2017 14:06:20 +1000 Subject: [PATCH 27/30] travis passing badge --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 6761a09..4573a27 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +[![Build Status](https://travis-ci.org/hdashnow/STRetch-paper.svg?branch=ci)](https://travis-ci.org/hdashnow/STRetch-paper) + **Update:** the STRetch paper is now available! If using STRetch, please cite: From 268f85afcd40ccac916c4a673950b0fe562b62d6 Mon Sep 17 00:00:00 2001 From: Harriet Dashnow Date: Fri, 22 Sep 2017 14:19:24 +1000 Subject: [PATCH 28/30] move testing installation stuff to .testing directory --- install-ci.sh => .testing/install-ci.sh | 0 install-packages.R => .testing/install-packages.R | 0 .travis.yml | 4 ++-- 3 files changed, 2 insertions(+), 2 deletions(-) rename install-ci.sh => .testing/install-ci.sh (100%) rename install-packages.R => .testing/install-packages.R (100%) diff --git a/install-ci.sh b/.testing/install-ci.sh similarity index 100% rename from install-ci.sh rename to .testing/install-ci.sh diff --git a/install-packages.R b/.testing/install-packages.R similarity index 100% rename from install-packages.R rename to .testing/install-packages.R diff --git a/.travis.yml b/.travis.yml index 5c9221d..60e845b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,10 +1,10 @@ language: python cache: packages install: - - ./install-ci.sh + - ./.testing/install-ci.sh # - PATH=$PATH:$PWD/tools/bin # might not be appropriate for users to add to their PATH # Install R packages - - tools/bin/Rscript --verbose install-packages.R + - tools/bin/Rscript --verbose ./.testing/install-packages.R # Create working directory - mkdir test - cp reference-data/SCA8_region.bed test/ From ffd78f73c75e200fa9ea564815f86d70766d6427 Mon Sep 17 00:00:00 2001 From: Harriet Dashnow Date: Fri, 22 Sep 2017 17:30:11 +1000 Subject: [PATCH 29/30] return PATH to previous state --- pipelines/pipeline_stages.groovy | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/pipelines/pipeline_stages.groovy b/pipelines/pipeline_stages.groovy index 2e72834..243b58f 100644 --- a/pipelines/pipeline_stages.groovy +++ b/pipelines/pipeline_stages.groovy @@ -23,13 +23,13 @@ set_sample_info = { if(!file(REF).exists()) fail """ - The configured decoy reference file: $REF could not be found. + The configured decoy reference file: $REF could not be found. Please check pipelines/pipeline_config.groovy to make sure this is set correctly """ [bwa,samtools,bedtools,goleft,python].each { tool -> - if(!file(tool).exists()) + if(!file(tool).exists()) fail """ The location of tool $tool does not appear to exist. @@ -88,11 +88,13 @@ STR_coverage = { STR_locus_counts = { transform("bam") to ("locus_counts") { exec """ - PATH=$PATH:$STRETCH/tools/bin; + STRPATH=$PATH; + PATH=$STRETCH/tools/bin:$PATH; $python $STRETCH/scripts/identify_locus.py --bam $input.bam --bed $STR_BED --output $output.locus_counts + ;PATH=$STRPATH """ } } @@ -106,7 +108,7 @@ estimate_size = { """ } else { exec """ - $STRETCH/tools/bin/Rscript $STRETCH/scripts/estimateSTR.R + $STRETCH/tools/bin/Rscript $STRETCH/scripts/estimateSTR.R --model $STRETCH/scripts/STRcov.model.csv --control $CONTROL """ @@ -145,12 +147,12 @@ doc "Calculate the median coverage over the target region" @filter('slop') str_targets = { - + doc "Create bed file of region likely to contain STR reads and their mates" SLOP=800 - //produce(STR_BED[0..-3] + 'slop.bed') { + //produce(STR_BED[0..-3] + 'slop.bed') { exec """ $bedtools slop -b $SLOP -i $input.bed -g ${REF}.genome | $bedtools merge > $output.bed """ @@ -166,9 +168,9 @@ extract_reads_region = { produce(branch.sample + '_L001_R1.fastq.gz', branch.sample + '_L001_R2.fastq.gz') { exec """ - cat <( $samtools view -hu -L $input.bed $input.bam ) - <( $samtools view -u -f 4 $input.bam ) | - $samtools collate -Ou -n 128 - $output.prefix | + cat <( $samtools view -hu -L $input.bed $input.bam ) + <( $samtools view -u -f 4 $input.bam ) | + $samtools collate -Ou -n 128 - $output.prefix | $bedtools bamtofastq -i - -fq >(gzip -c > $output1.gz) -fq2 >(gzip -c > $output2.gz) """ } From 7f0fd587c4cd9ea9efdeecf1dc3f4e98d164cc98 Mon Sep 17 00:00:00 2001 From: Harriet Dashnow Date: Thu, 28 Sep 2017 12:55:24 +1000 Subject: [PATCH 30/30] compare with previous result, remove spaces --- .testing/STRs.benchmark.tsv | 6 ++++++ .travis.yml | 12 ++++++------ 2 files changed, 12 insertions(+), 6 deletions(-) create mode 100644 .testing/STRs.benchmark.tsv diff --git a/.testing/STRs.benchmark.tsv b/.testing/STRs.benchmark.tsv new file mode 100644 index 0000000..2b0f96b --- /dev/null +++ b/.testing/STRs.benchmark.tsv @@ -0,0 +1,6 @@ +chrom start end sample repeatunit reflen locuscoverage outlier p_adj bpInsertion repeatUnits +chr13 70713515 70713561 11 AGC 15.3 35 1.88685648944145 0.0295898153640992 316.950118548597 120.950039516199 +chr13 70713515 70713561 69 AGC 15.3 8 0.4297790752454 0.333678177749521 76.1210174457791 40.6736724819264 +chr13 70713515 70713561 1 AGC 15.3 3 -0.426744171073212 0.665217162914456 32.9115187238725 26.2705062412908 +chr13 70713515 70713561 54 AGC 15.3 2 -0.730726313557163 0.7675268301731 24.4403744973217 23.4467914991072 +chr13 70713515 70713561 49 AGC 15.3 1 -1.15916508005647 0.876805548823274 16.0677184130193 20.6559061376731 diff --git a/.travis.yml b/.travis.yml index 60e845b..f2ca165 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,18 +1,18 @@ language: python cache: packages install: - - ./.testing/install-ci.sh - # - PATH=$PATH:$PWD/tools/bin # might not be appropriate for users to add to their PATH +- ./.testing/install-ci.sh # Install R packages - - tools/bin/Rscript --verbose ./.testing/install-packages.R +- tools/bin/Rscript --verbose ./.testing/install-packages.R # Create working directory - - mkdir test - - cp reference-data/SCA8_region.bed test/ - - cd test/ +- mkdir test +- cp reference-data/SCA8_region.bed test/ +- cd test/ # command to run tests script: # Run the test data - ../tools/bin/bpipe run ../pipelines/STRetch_exome_pipeline.groovy ../test-data/*.fastq.gz +- if diff STRs.tsv ../.testing/STRs.benchmark.tsv; then echo exit 0; else echo exit 1; fi after_script: - head *.locus_counts *.STR_counts *.median_cov - head *.tsv