Updating docs

Wedge-lab · Oct 17, 2015 · 34f5d97 · 34f5d97
1 parent 53a1aac
commit 34f5d97
Show file tree

Hide file tree

Showing 18 changed files with 61 additions and 37 deletions.
diff --git a/R/interconvertMutationBurdens.R b/R/interconvertMutationBurdens.R
@@ -1,3 +1,5 @@
+#' Mutation burden to mutation copy number
+#' 
 #' Function to convert mutation burdens into mutation copy number
 #' @param burden A vector containing mutation burdens
 #' @param totalCopyNumber A vector with total tumour copynumber
@@ -12,6 +14,8 @@ mutationBurdenToMutationCopyNumber = function(burden, totalCopyNumber, cellulari
   return(mutCopyNumber)
 }
 
+#' Mutation copy number to mutation burden
+#' 
 #' Function to convert mutation copy number to mutation burden
 #' @param copyNumber A vector containing mutation copy number
 #' @param totalCopyNumber A vector with total tumour copynumber

diff --git a/R/preprocessing.R b/R/preprocessing.R
@@ -1,6 +1,8 @@
 ALLELECOUNTER = "alleleCounter"
 LINKAGEPULL = "Linkage_pull.pl"
 
+#' Concatenate split files
+#' 
 #' Convenience function to concatenate a series of files specified in a file of file names.
 #' This function assumes all files have the same layout.
 #' @param fofn A file of file names to be concatenated
@@ -27,7 +29,8 @@ concat_files = function(fofn, inputdir, outfile, haveHeader) {
   write.table(output, file=outfile, col.names=haveHeader, row.names=F, sep="\t", quote=F)
 }
 
-
+#' Split a file per chromosome
+#'
 #' Convenience function to split an input file per chromosome. All it requires is that
 #' the infile has as first column chromosome specification. The output files will be named
 #' outdir/prefixCHROMNUMBERpostfix
@@ -55,6 +58,8 @@ split_by_chrom = function(infile, prefix, postfix, outdir, chrom_file) {
 ############################################
 # VCF 2 LOCI
 ############################################
+#' Parse genome index
+#' 
 #' Convenience function that parses a reference genome index as generated
 #' by samtools index
 #' @param fai_file The index
@@ -67,6 +72,8 @@ parseFai = function(fai_file) {
   return(fai)
 }
 
+#' Parse chromosomes to ignore file
+#' 
 #' Convenience function that parses an ignore file. This file
 #' is expected to have a single column with just chromosome names
 #' @param ignore_file The file specifying to be ignored chromosomes
@@ -79,6 +86,8 @@ parseIgnore = function(ignore_file) {
   return(ign)
 }
 
+#' Transform vcf to loci file
+#' 
 #' Function that dumps the loci of snvs from a series of vcf files into a single loci file
 #' @param vcf_files A vector of vcf files to be considered
 #' @param fai_file Reference genome index
@@ -110,6 +119,8 @@ vcf2loci = function(vcf_files, fai_file, ign_file, outfile) {
 ############################################
 # Allele counting
 ############################################
+#' Run alleleCount
+#' 
 #' Count the alleles for specified locations in the loci file. Expects alleleCount binary in $PATH
 #' @param locifile A file with at least chromsome and position columns of the locations to be counted
 #' @param bam A bam file
@@ -161,6 +172,8 @@ formatOutput = function(counts_table, v) {
   return(output)
 }
 
+#' Dump allele counts from vcf for normal
+#' 
 #' Returns an allele counts table for the normal sample
 #' @param v The vcf file
 #' @param centre The sequencing centre of which pipeline the vcf file originates
@@ -174,6 +187,8 @@ getCountsNormal = function(v, centre="sanger") {
   return(getAlleleCounts.Sanger(v, 1))
 }
 
+#' Dump allele counts from vcf for tumour
+#' 
 #' Returns an allele counts table for the tumour sample
 #' @param v The vcf file
 #' @param centre The sequencing centre of which pipeline the vcf file originates
@@ -187,6 +202,8 @@ getCountsTumour = function(v, centre="sanger") {
   return(getAlleleCounts.Sanger(v, 2))
 }
 
+#' Dump allele counts from Sanger pipeline vcf
+#' 
 #' Helper function that dumps the allele counts from a Sanger pipeline VCF file
 #' @param v The vcf file
 #' @param sample_col The column in which the counts are. If it's the first sample mentioned in the vcf this would be sample_col 1
@@ -218,6 +235,8 @@ run_linkage_pull_mut = function(output, loci_file, bam_file, bai_file) {
   return(count.data)
 }
 
+#' Phase mutation to mutation
+#' 
 #' Run mutation to mutation phasing. This function requires the Linkage_pull.pl script in $PATH.
 #' @param loci_file A list of loci
 #' @param phased_file File to save the output
@@ -311,6 +330,8 @@ run_linkage_pull_snp = function(loci_file, bam_file, bai_file, chr, pos1, ref1,
   return(linked.muts)
 }
 
+#' Phase mutation to SNP/copy number
+#' 
 #' Run mutation to copy number phasing. This function requires the Linkage_pull.pl script in $PATH.
 #' Note: This function should either be run separately per chromosome and then combined with \code{\link{concat_files}}
 #' or on all chromsomes in one go, but then the _allHaplotypeInfo.txt Battenberg files need to be concatenated first.
@@ -626,15 +647,17 @@ GetWTandMutCount <- function(loci_file, allele_frequencies_file) {
 ##############################################
 # GetDirichletProcessInfo
 ##############################################
+#' Create the DPClust input file
+#' 
 #' Function that takes allele counts and a copy number profile to estimate mutation copy number,
 #' cancer cell fraction and multiplicity for each point mutation.
 #' @param loci_file Simple four column file with chromosome, position, reference allele and alternative allele
 #' @param allele_frequencies_file Output file from alleleCounter on the specified loci
 #' @param cellularity_file Full path to a Battenberg rho_and_psi output file
 #' @param subclone_file Full path to a Battenberg subclones.txt output file
 #' @param gender Specify male or female
-#' @param SNP.phase.file Output file from mut_mut_phasing
-#' @param mut.phase.file Output file from mut_cn_phasing
+#' @param SNP.phase.file Output file from mut_mut_phasing, supply NA (as char) when not available
+#' @param mut.phase.file Output file from mut_cn_phasing, supply NA (as char) when not available
 #' @param output_file Name of the output file
 #' @author sd11
 #' @export
@@ -654,6 +677,8 @@ runGetDirichletProcessInfo = function(loci_file, allele_frequencies_file, cellul
 ##############################################
 # dpIn to VCF
 ##############################################
+#' DPClust input file to vcf
+#' 
 #' Transform a dirichlet input file into a VCF with the same info. It filters out mutations in areas that are not contained in the supplied genome index (fai file) or are contained in the ignore file (ign file)
 #' It takes the DP input file created by runGetDirichletProcessInfo and combines the columns with the vcf file supplied. Finally it gzips and indexes the file
 #' @param vcf_infile Filename of the VCF file to use as a base

diff --git a/inst/example/preproc_pipeline_simple.R b/inst/example/preproc_pipeline_simple.R
@@ -15,19 +15,23 @@ vcf_file = toString(args[3]) # Full path to the vcf file with SNV calls. All cal
 rho_and_psi_file = toString(args[4]) # Full path to a rho_and_psi output file from Battenberg
 subclones_file = toString(args[5]) # Full path to the subclones output file from Battenberg
 sex = toString(args[6]) # Specify male or female
-output_dir = toString(args[7]) # Full path to where the output must be written
+output_dir = toString(args[7]) # Full path to where the output should be written
 fai_file = toString(args[8]) # Full path to the reference genome index file used for this sample
 ign_file = toString(args[9]) # Full path to simple list of chromosome names to ignore (must contain at least Y and MT)
 
-# Define various files
+library(dpclust3p)
+
+# Define the final output file
+dpoutput_file = paste(output_dir, "/", samplename, "_allDirichletProcessInfo.txt", sep="")
+
+# Define various temp files
 loci_file = paste(output_dir, "/", samplename, "_loci.txt", sep="")
 allelecounts_file = paste(output_dir, "/", samplename, "_alleleFrequencies.txt", sep="")
-dpoutput_file = paste(output_dir, "/", samplename, "_allDirichletProcessInfo.txt", sep="")
 
 # Dump loci - this function can take multiple vcf files when multiple samples from same donor
-vcf2loci(vcf_file=vcf_file, fai_file=fai_file, ign_file=ign_file, loci_file=loci_file)
+vcf2loci(vcf_file=vcf_file, fai_file=fai_file, ign_file=ign_file, outfile=loci_file)
 
-# Count alleles
+# Fetch allele counts
 alleleCount(locifile=loci_file, bam=bam_file, outfile=allelecounts_file, min_baq=20, min_maq=35)
 
 # Create dpIn file
@@ -36,6 +40,6 @@ runGetDirichletProcessInfo(loci_file=loci_file,
                            cellularity_file=rho_and_psi_file, 
                            subclone_file=subclones_file, 
                            gender=sex, 
-                           SNP.phase.file=NA, 
-                           mut.phase.file=NA, 
-                           output_file=dpoutput_file)
+                           SNP.phase.file="NA", 
+                           mut.phase.file="NA", 
+                           output_file=dpoutput_file)
diff --git a/man/alleleCount.Rd b/man/alleleCount.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/preprocessing.R
 \name{alleleCount}
 \alias{alleleCount}
-\title{Count the alleles for specified locations in the loci file. Expects alleleCount binary in $PATH}
+\title{Run alleleCount}
 \usage{
 alleleCount(locifile, bam, outfile, min_baq = 20, min_maq = 35)
 }

diff --git a/man/concat_files.Rd b/man/concat_files.Rd
@@ -2,8 +2,7 @@
 % Please edit documentation in R/preprocessing.R
 \name{concat_files}
 \alias{concat_files}
-\title{Convenience function to concatenate a series of files specified in a file of file names.
-This function assumes all files have the same layout.}
+\title{Concatenate split files}
 \usage{
 concat_files(fofn, inputdir, outfile, haveHeader)
 }

diff --git a/man/dpIn2vcf.Rd b/man/dpIn2vcf.Rd
@@ -2,8 +2,7 @@
 % Please edit documentation in R/preprocessing.R
 \name{dpIn2vcf}
 \alias{dpIn2vcf}
-\title{Transform a dirichlet input file into a VCF with the same info. It filters out mutations in areas that are not contained in the supplied genome index (fai file) or are contained in the ignore file (ign file)
-It takes the DP input file created by runGetDirichletProcessInfo and combines the columns with the vcf file supplied. Finally it gzips and indexes the file}
+\title{DPClust input file to vcf}
 \usage{
 dpIn2vcf(vcf_infile, dpIn_file, vcf_outfile, fai_file, ign_file,
   genome = "hg19")

diff --git a/man/getAlleleCounts.Sanger.Rd b/man/getAlleleCounts.Sanger.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/preprocessing.R
 \name{getAlleleCounts.Sanger}
 \alias{getAlleleCounts.Sanger}
-\title{Helper function that dumps the allele counts from a Sanger pipeline VCF file}
+\title{Dump allele counts from Sanger pipeline vcf}
 \usage{
 getAlleleCounts.Sanger(v, sample_col)
 }

diff --git a/man/getCountsNormal.Rd b/man/getCountsNormal.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/preprocessing.R
 \name{getCountsNormal}
 \alias{getCountsNormal}
-\title{Returns an allele counts table for the normal sample}
+\title{Dump allele counts from vcf for normal}
 \usage{
 getCountsNormal(v, centre = "sanger")
 }

diff --git a/man/getCountsTumour.Rd b/man/getCountsTumour.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/preprocessing.R
 \name{getCountsTumour}
 \alias{getCountsTumour}
-\title{Returns an allele counts table for the tumour sample}
+\title{Dump allele counts from vcf for tumour}
 \usage{
 getCountsTumour(v, centre = "sanger")
 }

diff --git a/man/mut_cn_phasing.Rd b/man/mut_cn_phasing.Rd
@@ -2,9 +2,7 @@
 % Please edit documentation in R/preprocessing.R
 \name{mut_cn_phasing}
 \alias{mut_cn_phasing}
-\title{Run mutation to copy number phasing. This function requires the Linkage_pull.pl script in $PATH.
-Note: This function should either be run separately per chromosome and then combined with \code{\link{concat_files}}
-or on all chromsomes in one go, but then the _allHaplotypeInfo.txt Battenberg files need to be concatenated first.}
+\title{Phase mutation to SNP/copy number}
 \usage{
 mut_cn_phasing(loci_file, phased_file, hap_file, bam_file, bai_file, outfile,
   max_distance)

diff --git a/man/mut_mut_phasing.Rd b/man/mut_mut_phasing.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/preprocessing.R
 \name{mut_mut_phasing}
 \alias{mut_mut_phasing}
-\title{Run mutation to mutation phasing. This function requires the Linkage_pull.pl script in $PATH.}
+\title{Phase mutation to mutation}
 \usage{
 mut_mut_phasing(loci_file, phased_file, bam_file, bai_file, max_distance)
 }

diff --git a/man/mutationBurdenToMutationCopyNumber.Rd b/man/mutationBurdenToMutationCopyNumber.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/interconvertMutationBurdens.R
 \name{mutationBurdenToMutationCopyNumber}
 \alias{mutationBurdenToMutationCopyNumber}
-\title{Function to convert mutation burdens into mutation copy number}
+\title{Mutation burden to mutation copy number}
 \usage{
 mutationBurdenToMutationCopyNumber(burden, totalCopyNumber, cellularity,
   normalCopyNumber = rep(2, length(burden)))

diff --git a/man/mutationCopyNumberToMutationBurden.Rd b/man/mutationCopyNumberToMutationBurden.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/interconvertMutationBurdens.R
 \name{mutationCopyNumberToMutationBurden}
 \alias{mutationCopyNumberToMutationBurden}
-\title{Function to convert mutation copy number to mutation burden}
+\title{Mutation copy number to mutation burden}
 \usage{
 mutationCopyNumberToMutationBurden(copyNumber, totalCopyNumber, cellularity,
   normalCopyNumber = rep(2, length(copyNumber)))

diff --git a/man/parseFai.Rd b/man/parseFai.Rd
@@ -2,8 +2,7 @@
 % Please edit documentation in R/preprocessing.R
 \name{parseFai}
 \alias{parseFai}
-\title{Convenience function that parses a reference genome index as generated
-by samtools index}
+\title{Parse genome index}
 \usage{
 parseFai(fai_file)
 }

diff --git a/man/parseIgnore.Rd b/man/parseIgnore.Rd
@@ -2,8 +2,7 @@
 % Please edit documentation in R/preprocessing.R
 \name{parseIgnore}
 \alias{parseIgnore}
-\title{Convenience function that parses an ignore file. This file
-is expected to have a single column with just chromosome names}
+\title{Parse chromosomes to ignore file}
 \usage{
 parseIgnore(ignore_file)
 }

diff --git a/man/runGetDirichletProcessInfo.Rd b/man/runGetDirichletProcessInfo.Rd
@@ -2,8 +2,7 @@
 % Please edit documentation in R/preprocessing.R
 \name{runGetDirichletProcessInfo}
 \alias{runGetDirichletProcessInfo}
-\title{Function that takes allele counts and a copy number profile to estimate mutation copy number,
-cancer cell fraction and multiplicity for each point mutation.}
+\title{Create the DPClust input file}
 \usage{
 runGetDirichletProcessInfo(loci_file, allele_frequencies_file, cellularity_file,
   subclone_file, gender, SNP.phase.file, mut.phase.file, output_file)
@@ -19,9 +18,9 @@ runGetDirichletProcessInfo(loci_file, allele_frequencies_file, cellularity_file,
 
 \item{gender}{Specify male or female}
 
-\item{SNP.phase.file}{Output file from mut_mut_phasing}
+\item{SNP.phase.file}{Output file from mut_mut_phasing, supply NA (as char) when not available}
 
-\item{mut.phase.file}{Output file from mut_cn_phasing}
+\item{mut.phase.file}{Output file from mut_cn_phasing, supply NA (as char) when not available}
 
 \item{output_file}{Name of the output file}
 }

diff --git a/man/split_by_chrom.Rd b/man/split_by_chrom.Rd
@@ -2,9 +2,7 @@
 % Please edit documentation in R/preprocessing.R
 \name{split_by_chrom}
 \alias{split_by_chrom}
-\title{Convenience function to split an input file per chromosome. All it requires is that
-the infile has as first column chromosome specification. The output files will be named
-outdir/prefixCHROMNUMBERpostfix}
+\title{Split a file per chromosome}
 \usage{
 split_by_chrom(infile, prefix, postfix, outdir, chrom_file)
 }

diff --git a/man/vcf2loci.Rd b/man/vcf2loci.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/preprocessing.R
 \name{vcf2loci}
 \alias{vcf2loci}
-\title{Function that dumps the loci of snvs from a series of vcf files into a single loci file}
+\title{Transform vcf to loci file}
 \usage{
 vcf2loci(vcf_files, fai_file, ign_file, outfile)
 }