Skip to content

Commit

Permalink
Merge pull request #19 from hdashnow/ci
Browse files Browse the repository at this point in the history
Introduce Travis CI testing
  • Loading branch information
hdashnow authored Sep 28, 2017
2 parents b4dae20 + 7f0fd58 commit 3015dc1
Show file tree
Hide file tree
Showing 9 changed files with 205 additions and 12 deletions.
6 changes: 6 additions & 0 deletions .testing/STRs.benchmark.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
chrom start end sample repeatunit reflen locuscoverage outlier p_adj bpInsertion repeatUnits
chr13 70713515 70713561 11 AGC 15.3 35 1.88685648944145 0.0295898153640992 316.950118548597 120.950039516199
chr13 70713515 70713561 69 AGC 15.3 8 0.4297790752454 0.333678177749521 76.1210174457791 40.6736724819264
chr13 70713515 70713561 1 AGC 15.3 3 -0.426744171073212 0.665217162914456 32.9115187238725 26.2705062412908
chr13 70713515 70713561 54 AGC 15.3 2 -0.730726313557163 0.7675268301731 24.4403744973217 23.4467914991072
chr13 70713515 70713561 49 AGC 15.3 1 -1.15916508005647 0.876805548823274 16.0677184130193 20.6559061376731
158 changes: 158 additions & 0 deletions .testing/install-ci.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
#!/bin/bash

## This script will install the tools required for the STRetch pipeline.
## It will fetched each tool from the web and placed into the tools/ subdirectory.
## Paths to all installed tools can be found in the file tools.groovy at the
## end of execution of this script. These paths can be changed if a different
## version of software is required. Note that R must be installed manually
##

installdir=$PWD
refdir=$PWD/reference-data
toolspec=$PWD/pipelines/pipeline_config.groovy
template=$PWD/pipelines/config-examples/pipeline_config_template.groovy

mkdir -p tools/bin
cd tools

#a list of which programs need to be installed
commands="bpipe python goleft bedtools bwa samtools"

#installation method
function bpipe_install {
wget -O bpipe-0.9.9.2.tar.gz https://github.com/ssadedin/bpipe/releases/download/0.9.9.2/bpipe-0.9.9.2.tar.gz
tar -zxvf bpipe-0.9.9.2.tar.gz ; rm bpipe-0.9.9.2.tar.gz
ln -s $PWD/bpipe-0.9.9.2/bin/* $PWD/bin/
}

# Installs miniconda, Python 3 + required packages, BedTools and goleft
# (and any other dependancies listed in environment.yml)
function python_install {
wget -O miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
bash miniconda.sh -b -p $PWD/miniconda
rm miniconda.sh
$PWD/miniconda/bin/conda env create -f ../environment.yml
ln -s $PWD/miniconda/envs/STR/bin/* $PWD/bin/
# source activate STR
}

function bwa_install {
wget -O bwakit-0.7.15_x64-linux.tar.bz2 --no-check-certificate https://github.com/lh3/bwa/releases/download/v0.7.15/bwakit-0.7.15_x64-linux.tar.bz2
tar -jxvf bwakit-0.7.15_x64-linux.tar.bz2
rm bwakit-0.7.15_x64-linux.tar.bz2
ln -s $PWD/bwa.kit/* $PWD/bin/
}

function samtools_install {
wget --no-check-certificate https://sourceforge.net/projects/samtools/files/samtools/1.3.1/samtools-1.3.1.tar.bz2
tar -jxvf samtools-1.3.1.tar.bz2
rm samtools-1.3.1.tar.bz2
make prefix=$PWD install -C samtools-1.3.1/
}

function download {
wget --no-check-certificate -O $refdir/reference-data.zip https://ndownloader.figshare.com/articles/5353399?private_link=be9bde235448e937e468
unzip $refdir/reference-data.zip -d $refdir
rm $refdir/reference-data.zip

mkdir $installdir/test-data
mv $refdir/*.gz $refdir/*.bam $refdir/*.bai $installdir/test-data
}

#populate toolspec
echo "// Bpipe pipeline config file" > $toolspec
echo "// Paths are relative to the directory the pipeline is running in, so absolute" >> $toolspec
echo "// paths are recommended." >> $toolspec
echo >> $toolspec
echo "// Adjust parameters" >> $toolspec
echo "PLATFORM='illumina'" >> $toolspec
echo >> $toolspec
echo "// Number of threads to use for BWA" >> $toolspec
echo "threads=8" >> $toolspec
echo >> $toolspec
echo "// For exome pipeline only ***Edit before running the exome pipeline***" >> $toolspec
echo "EXOME_TARGET=\"SCA8_region.bed\"" >> $toolspec
echo >> $toolspec

#set STRetch base directory
echo "// STRetch installation location" >> $toolspec
echo "STRETCH=\"$installdir\"" >> $toolspec
echo >> $toolspec

echo "// Paths to tools used by the pipeline" >> $toolspec

for c in $commands ; do
c_path=`which $PWD/bin/$c 2>/dev/null`
if [ -z $c_path ] ; then
echo "$c not found, fetching it"
${c}_install
c_path=`which $PWD/bin/$c 2>/dev/null`
fi
echo "$c=\"$c_path\"" >> $toolspec
done

if [ ! -f $refdir/*dedup.sorted.bed ] ; then
mkdir -p $refdir
echo "Downloading reference and test data"
download
fi

echo >> $toolspec
echo "// Path to reference data" >> $toolspec
echo "refdir=\"$refdir\"" >> $toolspec

echo >> $toolspec
echo "// Decoy reference assumed to have matching .genome file in the same directory" >> $toolspec
echo "REF=\"$refdir/hg19.chr13.STRdecoys.sorted.fasta\"" >> $toolspec
echo "STR_BED=\"$refdir/hg19.simpleRepeat_period1-6_dedup.sorted.bed\"" >> $toolspec
echo "DECOY_BED=\"$refdir/STRdecoys.sorted.bed\"" >> $toolspec
echo "// By default, uses other samples in the same batch as a control" >> $toolspec
echo "CONTROL=\"\"" >> $toolspec
echo "// Uncomment the line below to use a set of WGS samples as controls, or specify your own" >> $toolspec
echo "CONTROL=\"$refdir/PCRfreeWGS.controls.tsv\"" >> $toolspec
echo >> $toolspec


#loop through commands to check they are all installed
echo "**********************************************************"
echo "Checking that all required tools were installed:"
Final_message="All commands installed successfully!"
for c in $commands ; do
c_path=`which $PWD/bin/$c 2>/dev/null`
if [ -z $c_path ] ; then
echo -n "WARNING: $c could not be found!!!! "
echo "You will need to download and install $c manually, then add its path to $toolspec"
Final_message="WARNING: One or more command did not install successfully. See warning messages above. You will need to correct this before running STRetch."
else
echo "$c looks like it has been installed"
fi
done

echo "**********************************************************"

#check that R is installed
R_path=`which R 2>/dev/null`
if [ -z $R_path ] ; then
echo "R not found!"
echo "Please go to http://www.r-project.org/ and follow the installation instructions."
echo "Please also install the required R packages."
else
echo "R seems to be available."
echo "Make sure you are using the correct version of R and have installed all required packages."
fi
echo "R=\"$R_path\"" >> $toolspec

echo "**********************************************************"

#check for reference data
if [ ! -f $refdir/*dedup.sorted.bed ] ; then
echo -n "WARNING: reference files could not be found!!!! "
echo "You will need to download them manually, then add the path to $toolspec"
else
echo "It looks like the reference data has been downloaded"
fi

echo "**********************************************************"
echo $Final_message
echo "Please make sure you have installed the required R packages:"
echo "install.packages(c('optparse','plyr','dplyr','tidyr','reshape2'))"
4 changes: 4 additions & 0 deletions .testing/install-packages.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
## Create the personal library if it doesn't exist. Ignore a warning if the directory already exists.
dir.create(Sys.getenv("R_LIBS_USER"), showWarnings = TRUE, recursive = TRUE)
## Install packages
install.packages(c('optparse','plyr','dplyr','tidyr','reshape2'), repos="http://cran.rstudio.com/")
18 changes: 18 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
language: python
cache: packages
install:
- ./.testing/install-ci.sh
# Install R packages
- tools/bin/Rscript --verbose ./.testing/install-packages.R
# Create working directory
- mkdir test
- cp reference-data/SCA8_region.bed test/
- cd test/
# command to run tests
script:
# Run the test data
- ../tools/bin/bpipe run ../pipelines/STRetch_exome_pipeline.groovy ../test-data/*.fastq.gz
- if diff STRs.tsv ../.testing/STRs.benchmark.tsv; then echo exit 0; else echo exit 1; fi
after_script:
- head *.locus_counts *.STR_counts *.median_cov
- head *.tsv
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
[![Build Status](https://travis-ci.org/hdashnow/STRetch-paper.svg?branch=ci)](https://travis-ci.org/hdashnow/STRetch-paper)

**Update:** the STRetch paper is now available!

If using STRetch, please cite:
Expand Down
2 changes: 2 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
name: STR
channels:
- conda-forge
- bioconda
dependencies:
- python=3*
- R
- BioPython
- PyVCF
- pysam
Expand Down
Empty file modified install.sh
100644 → 100755
Empty file.
2 changes: 1 addition & 1 deletion pipelines/STRetch_exome_pipeline.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ run {
align_bwa + index_bam +
median_cov_region +
STR_coverage +
STR_locus_counts
STR_locus_counts
] +
estimate_size
}
25 changes: 14 additions & 11 deletions pipelines/pipeline_stages.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@ set_sample_info = {

if(!file(REF).exists())
fail """
The configured decoy reference file: $REF could not be found.
The configured decoy reference file: $REF could not be found.
Please check pipelines/pipeline_config.groovy to make sure this is set correctly
"""

[bwa,samtools,bedtools,goleft,python].each { tool ->
if(!file(tool).exists())
if(!file(tool).exists())
fail """
The location of tool $tool does not appear to exist.
Expand Down Expand Up @@ -88,10 +88,13 @@ STR_coverage = {
STR_locus_counts = {
transform("bam") to ("locus_counts") {
exec """
STRPATH=$PATH;
PATH=$STRETCH/tools/bin:$PATH;
$python $STRETCH/scripts/identify_locus.py
--bam $input.bam
--bed $STR_BED
--output $output.locus_counts
;PATH=$STRPATH
"""
}
}
Expand All @@ -100,13 +103,13 @@ estimate_size = {
produce("STRs.tsv") {
if(CONTROL=="") {
exec """
Rscript $STRETCH/scripts/estimateSTR.R
--model $STRETCH/scripts/STRcov.model.csv
$STRETCH/tools/bin/Rscript $STRETCH/scripts/estimateSTR.R
--model $STRETCH/scripts/STRcov.model.csv
"""
} else {
exec """
Rscript $STRETCH/scripts/estimateSTR.R
--model $STRETCH/scripts/STRcov.model.csv
$STRETCH/tools/bin/Rscript $STRETCH/scripts/estimateSTR.R
--model $STRETCH/scripts/STRcov.model.csv
--control $CONTROL
"""
}
Expand Down Expand Up @@ -144,12 +147,12 @@ doc "Calculate the median coverage over the target region"

@filter('slop')
str_targets = {

doc "Create bed file of region likely to contain STR reads and their mates"

SLOP=800

//produce(STR_BED[0..-3] + 'slop.bed') {
//produce(STR_BED[0..-3] + 'slop.bed') {
exec """
$bedtools slop -b $SLOP -i $input.bed -g ${REF}.genome | $bedtools merge > $output.bed
"""
Expand All @@ -165,9 +168,9 @@ extract_reads_region = {
produce(branch.sample + '_L001_R1.fastq.gz', branch.sample + '_L001_R2.fastq.gz') {
exec """
cat <( $samtools view -hu -L $input.bed $input.bam )
<( $samtools view -u -f 4 $input.bam ) |
$samtools collate -Ou -n 128 - $output.prefix |
cat <( $samtools view -hu -L $input.bed $input.bam )
<( $samtools view -u -f 4 $input.bam ) |
$samtools collate -Ou -n 128 - $output.prefix |
$bedtools bamtofastq -i - -fq >(gzip -c > $output1.gz) -fq2 >(gzip -c > $output2.gz)
"""
}
Expand Down

0 comments on commit 3015dc1

Please sign in to comment.