Skip to content

Commit

Permalink
Updating docs
Browse files Browse the repository at this point in the history
  • Loading branch information
sdentro committed Oct 17, 2015
1 parent 53a1aac commit 34f5d97
Show file tree
Hide file tree
Showing 18 changed files with 61 additions and 37 deletions.
4 changes: 4 additions & 0 deletions R/interconvertMutationBurdens.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#' Mutation burden to mutation copy number
#'
#' Function to convert mutation burdens into mutation copy number
#' @param burden A vector containing mutation burdens
#' @param totalCopyNumber A vector with total tumour copynumber
Expand All @@ -12,6 +14,8 @@ mutationBurdenToMutationCopyNumber = function(burden, totalCopyNumber, cellulari
return(mutCopyNumber)
}

#' Mutation copy number to mutation burden
#'
#' Function to convert mutation copy number to mutation burden
#' @param copyNumber A vector containing mutation copy number
#' @param totalCopyNumber A vector with total tumour copynumber
Expand Down
31 changes: 28 additions & 3 deletions R/preprocessing.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
ALLELECOUNTER = "alleleCounter"
LINKAGEPULL = "Linkage_pull.pl"

#' Concatenate split files
#'
#' Convenience function to concatenate a series of files specified in a file of file names.
#' This function assumes all files have the same layout.
#' @param fofn A file of file names to be concatenated
Expand All @@ -27,7 +29,8 @@ concat_files = function(fofn, inputdir, outfile, haveHeader) {
write.table(output, file=outfile, col.names=haveHeader, row.names=F, sep="\t", quote=F)
}


#' Split a file per chromosome
#'
#' Convenience function to split an input file per chromosome. All it requires is that
#' the infile has as first column chromosome specification. The output files will be named
#' outdir/prefixCHROMNUMBERpostfix
Expand Down Expand Up @@ -55,6 +58,8 @@ split_by_chrom = function(infile, prefix, postfix, outdir, chrom_file) {
############################################
# VCF 2 LOCI
############################################
#' Parse genome index
#'
#' Convenience function that parses a reference genome index as generated
#' by samtools index
#' @param fai_file The index
Expand All @@ -67,6 +72,8 @@ parseFai = function(fai_file) {
return(fai)
}

#' Parse chromosomes to ignore file
#'
#' Convenience function that parses an ignore file. This file
#' is expected to have a single column with just chromosome names
#' @param ignore_file The file specifying to be ignored chromosomes
Expand All @@ -79,6 +86,8 @@ parseIgnore = function(ignore_file) {
return(ign)
}

#' Transform vcf to loci file
#'
#' Function that dumps the loci of snvs from a series of vcf files into a single loci file
#' @param vcf_files A vector of vcf files to be considered
#' @param fai_file Reference genome index
Expand Down Expand Up @@ -110,6 +119,8 @@ vcf2loci = function(vcf_files, fai_file, ign_file, outfile) {
############################################
# Allele counting
############################################
#' Run alleleCount
#'
#' Count the alleles for specified locations in the loci file. Expects alleleCount binary in $PATH
#' @param locifile A file with at least chromsome and position columns of the locations to be counted
#' @param bam A bam file
Expand Down Expand Up @@ -161,6 +172,8 @@ formatOutput = function(counts_table, v) {
return(output)
}

#' Dump allele counts from vcf for normal
#'
#' Returns an allele counts table for the normal sample
#' @param v The vcf file
#' @param centre The sequencing centre of which pipeline the vcf file originates
Expand All @@ -174,6 +187,8 @@ getCountsNormal = function(v, centre="sanger") {
return(getAlleleCounts.Sanger(v, 1))
}

#' Dump allele counts from vcf for tumour
#'
#' Returns an allele counts table for the tumour sample
#' @param v The vcf file
#' @param centre The sequencing centre of which pipeline the vcf file originates
Expand All @@ -187,6 +202,8 @@ getCountsTumour = function(v, centre="sanger") {
return(getAlleleCounts.Sanger(v, 2))
}

#' Dump allele counts from Sanger pipeline vcf
#'
#' Helper function that dumps the allele counts from a Sanger pipeline VCF file
#' @param v The vcf file
#' @param sample_col The column in which the counts are. If it's the first sample mentioned in the vcf this would be sample_col 1
Expand Down Expand Up @@ -218,6 +235,8 @@ run_linkage_pull_mut = function(output, loci_file, bam_file, bai_file) {
return(count.data)
}

#' Phase mutation to mutation
#'
#' Run mutation to mutation phasing. This function requires the Linkage_pull.pl script in $PATH.
#' @param loci_file A list of loci
#' @param phased_file File to save the output
Expand Down Expand Up @@ -311,6 +330,8 @@ run_linkage_pull_snp = function(loci_file, bam_file, bai_file, chr, pos1, ref1,
return(linked.muts)
}

#' Phase mutation to SNP/copy number
#'
#' Run mutation to copy number phasing. This function requires the Linkage_pull.pl script in $PATH.
#' Note: This function should either be run separately per chromosome and then combined with \code{\link{concat_files}}
#' or on all chromsomes in one go, but then the _allHaplotypeInfo.txt Battenberg files need to be concatenated first.
Expand Down Expand Up @@ -626,15 +647,17 @@ GetWTandMutCount <- function(loci_file, allele_frequencies_file) {
##############################################
# GetDirichletProcessInfo
##############################################
#' Create the DPClust input file
#'
#' Function that takes allele counts and a copy number profile to estimate mutation copy number,
#' cancer cell fraction and multiplicity for each point mutation.
#' @param loci_file Simple four column file with chromosome, position, reference allele and alternative allele
#' @param allele_frequencies_file Output file from alleleCounter on the specified loci
#' @param cellularity_file Full path to a Battenberg rho_and_psi output file
#' @param subclone_file Full path to a Battenberg subclones.txt output file
#' @param gender Specify male or female
#' @param SNP.phase.file Output file from mut_mut_phasing
#' @param mut.phase.file Output file from mut_cn_phasing
#' @param SNP.phase.file Output file from mut_mut_phasing, supply NA (as char) when not available
#' @param mut.phase.file Output file from mut_cn_phasing, supply NA (as char) when not available
#' @param output_file Name of the output file
#' @author sd11
#' @export
Expand All @@ -654,6 +677,8 @@ runGetDirichletProcessInfo = function(loci_file, allele_frequencies_file, cellul
##############################################
# dpIn to VCF
##############################################
#' DPClust input file to vcf
#'
#' Transform a dirichlet input file into a VCF with the same info. It filters out mutations in areas that are not contained in the supplied genome index (fai file) or are contained in the ignore file (ign file)
#' It takes the DP input file created by runGetDirichletProcessInfo and combines the columns with the vcf file supplied. Finally it gzips and indexes the file
#' @param vcf_infile Filename of the VCF file to use as a base
Expand Down
20 changes: 12 additions & 8 deletions inst/example/preproc_pipeline_simple.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,23 @@ vcf_file = toString(args[3]) # Full path to the vcf file with SNV calls. All cal
rho_and_psi_file = toString(args[4]) # Full path to a rho_and_psi output file from Battenberg
subclones_file = toString(args[5]) # Full path to the subclones output file from Battenberg
sex = toString(args[6]) # Specify male or female
output_dir = toString(args[7]) # Full path to where the output must be written
output_dir = toString(args[7]) # Full path to where the output should be written
fai_file = toString(args[8]) # Full path to the reference genome index file used for this sample
ign_file = toString(args[9]) # Full path to simple list of chromosome names to ignore (must contain at least Y and MT)

# Define various files
library(dpclust3p)

# Define the final output file
dpoutput_file = paste(output_dir, "/", samplename, "_allDirichletProcessInfo.txt", sep="")

# Define various temp files
loci_file = paste(output_dir, "/", samplename, "_loci.txt", sep="")
allelecounts_file = paste(output_dir, "/", samplename, "_alleleFrequencies.txt", sep="")
dpoutput_file = paste(output_dir, "/", samplename, "_allDirichletProcessInfo.txt", sep="")

# Dump loci - this function can take multiple vcf files when multiple samples from same donor
vcf2loci(vcf_file=vcf_file, fai_file=fai_file, ign_file=ign_file, loci_file=loci_file)
vcf2loci(vcf_file=vcf_file, fai_file=fai_file, ign_file=ign_file, outfile=loci_file)

# Count alleles
# Fetch allele counts
alleleCount(locifile=loci_file, bam=bam_file, outfile=allelecounts_file, min_baq=20, min_maq=35)

# Create dpIn file
Expand All @@ -36,6 +40,6 @@ runGetDirichletProcessInfo(loci_file=loci_file,
cellularity_file=rho_and_psi_file,
subclone_file=subclones_file,
gender=sex,
SNP.phase.file=NA,
mut.phase.file=NA,
output_file=dpoutput_file)
SNP.phase.file="NA",
mut.phase.file="NA",
output_file=dpoutput_file)
2 changes: 1 addition & 1 deletion man/alleleCount.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
% Please edit documentation in R/preprocessing.R
\name{alleleCount}
\alias{alleleCount}
\title{Count the alleles for specified locations in the loci file. Expects alleleCount binary in $PATH}
\title{Run alleleCount}
\usage{
alleleCount(locifile, bam, outfile, min_baq = 20, min_maq = 35)
}
Expand Down
3 changes: 1 addition & 2 deletions man/concat_files.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
% Please edit documentation in R/preprocessing.R
\name{concat_files}
\alias{concat_files}
\title{Convenience function to concatenate a series of files specified in a file of file names.
This function assumes all files have the same layout.}
\title{Concatenate split files}
\usage{
concat_files(fofn, inputdir, outfile, haveHeader)
}
Expand Down
3 changes: 1 addition & 2 deletions man/dpIn2vcf.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
% Please edit documentation in R/preprocessing.R
\name{dpIn2vcf}
\alias{dpIn2vcf}
\title{Transform a dirichlet input file into a VCF with the same info. It filters out mutations in areas that are not contained in the supplied genome index (fai file) or are contained in the ignore file (ign file)
It takes the DP input file created by runGetDirichletProcessInfo and combines the columns with the vcf file supplied. Finally it gzips and indexes the file}
\title{DPClust input file to vcf}
\usage{
dpIn2vcf(vcf_infile, dpIn_file, vcf_outfile, fai_file, ign_file,
genome = "hg19")
Expand Down
2 changes: 1 addition & 1 deletion man/getAlleleCounts.Sanger.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
% Please edit documentation in R/preprocessing.R
\name{getAlleleCounts.Sanger}
\alias{getAlleleCounts.Sanger}
\title{Helper function that dumps the allele counts from a Sanger pipeline VCF file}
\title{Dump allele counts from Sanger pipeline vcf}
\usage{
getAlleleCounts.Sanger(v, sample_col)
}
Expand Down
2 changes: 1 addition & 1 deletion man/getCountsNormal.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
% Please edit documentation in R/preprocessing.R
\name{getCountsNormal}
\alias{getCountsNormal}
\title{Returns an allele counts table for the normal sample}
\title{Dump allele counts from vcf for normal}
\usage{
getCountsNormal(v, centre = "sanger")
}
Expand Down
2 changes: 1 addition & 1 deletion man/getCountsTumour.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
% Please edit documentation in R/preprocessing.R
\name{getCountsTumour}
\alias{getCountsTumour}
\title{Returns an allele counts table for the tumour sample}
\title{Dump allele counts from vcf for tumour}
\usage{
getCountsTumour(v, centre = "sanger")
}
Expand Down
4 changes: 1 addition & 3 deletions man/mut_cn_phasing.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@
% Please edit documentation in R/preprocessing.R
\name{mut_cn_phasing}
\alias{mut_cn_phasing}
\title{Run mutation to copy number phasing. This function requires the Linkage_pull.pl script in $PATH.
Note: This function should either be run separately per chromosome and then combined with \code{\link{concat_files}}
or on all chromsomes in one go, but then the _allHaplotypeInfo.txt Battenberg files need to be concatenated first.}
\title{Phase mutation to SNP/copy number}
\usage{
mut_cn_phasing(loci_file, phased_file, hap_file, bam_file, bai_file, outfile,
max_distance)
Expand Down
2 changes: 1 addition & 1 deletion man/mut_mut_phasing.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
% Please edit documentation in R/preprocessing.R
\name{mut_mut_phasing}
\alias{mut_mut_phasing}
\title{Run mutation to mutation phasing. This function requires the Linkage_pull.pl script in $PATH.}
\title{Phase mutation to mutation}
\usage{
mut_mut_phasing(loci_file, phased_file, bam_file, bai_file, max_distance)
}
Expand Down
2 changes: 1 addition & 1 deletion man/mutationBurdenToMutationCopyNumber.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
% Please edit documentation in R/interconvertMutationBurdens.R
\name{mutationBurdenToMutationCopyNumber}
\alias{mutationBurdenToMutationCopyNumber}
\title{Function to convert mutation burdens into mutation copy number}
\title{Mutation burden to mutation copy number}
\usage{
mutationBurdenToMutationCopyNumber(burden, totalCopyNumber, cellularity,
normalCopyNumber = rep(2, length(burden)))
Expand Down
2 changes: 1 addition & 1 deletion man/mutationCopyNumberToMutationBurden.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
% Please edit documentation in R/interconvertMutationBurdens.R
\name{mutationCopyNumberToMutationBurden}
\alias{mutationCopyNumberToMutationBurden}
\title{Function to convert mutation copy number to mutation burden}
\title{Mutation copy number to mutation burden}
\usage{
mutationCopyNumberToMutationBurden(copyNumber, totalCopyNumber, cellularity,
normalCopyNumber = rep(2, length(copyNumber)))
Expand Down
3 changes: 1 addition & 2 deletions man/parseFai.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
% Please edit documentation in R/preprocessing.R
\name{parseFai}
\alias{parseFai}
\title{Convenience function that parses a reference genome index as generated
by samtools index}
\title{Parse genome index}
\usage{
parseFai(fai_file)
}
Expand Down
3 changes: 1 addition & 2 deletions man/parseIgnore.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
% Please edit documentation in R/preprocessing.R
\name{parseIgnore}
\alias{parseIgnore}
\title{Convenience function that parses an ignore file. This file
is expected to have a single column with just chromosome names}
\title{Parse chromosomes to ignore file}
\usage{
parseIgnore(ignore_file)
}
Expand Down
7 changes: 3 additions & 4 deletions man/runGetDirichletProcessInfo.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
% Please edit documentation in R/preprocessing.R
\name{runGetDirichletProcessInfo}
\alias{runGetDirichletProcessInfo}
\title{Function that takes allele counts and a copy number profile to estimate mutation copy number,
cancer cell fraction and multiplicity for each point mutation.}
\title{Create the DPClust input file}
\usage{
runGetDirichletProcessInfo(loci_file, allele_frequencies_file, cellularity_file,
subclone_file, gender, SNP.phase.file, mut.phase.file, output_file)
Expand All @@ -19,9 +18,9 @@ runGetDirichletProcessInfo(loci_file, allele_frequencies_file, cellularity_file,

\item{gender}{Specify male or female}

\item{SNP.phase.file}{Output file from mut_mut_phasing}
\item{SNP.phase.file}{Output file from mut_mut_phasing, supply NA (as char) when not available}

\item{mut.phase.file}{Output file from mut_cn_phasing}
\item{mut.phase.file}{Output file from mut_cn_phasing, supply NA (as char) when not available}

\item{output_file}{Name of the output file}
}
Expand Down
4 changes: 1 addition & 3 deletions man/split_by_chrom.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@
% Please edit documentation in R/preprocessing.R
\name{split_by_chrom}
\alias{split_by_chrom}
\title{Convenience function to split an input file per chromosome. All it requires is that
the infile has as first column chromosome specification. The output files will be named
outdir/prefixCHROMNUMBERpostfix}
\title{Split a file per chromosome}
\usage{
split_by_chrom(infile, prefix, postfix, outdir, chrom_file)
}
Expand Down
2 changes: 1 addition & 1 deletion man/vcf2loci.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
% Please edit documentation in R/preprocessing.R
\name{vcf2loci}
\alias{vcf2loci}
\title{Function that dumps the loci of snvs from a series of vcf files into a single loci file}
\title{Transform vcf to loci file}
\usage{
vcf2loci(vcf_files, fai_file, ign_file, outfile)
}
Expand Down

0 comments on commit 34f5d97

Please sign in to comment.