-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fixed outstanding issues. Double check and push new release in next c…
…ommit
- Loading branch information
Anoushka
committed
Sep 22, 2021
1 parent
7afecd9
commit 9662cb3
Showing
17 changed files
with
547 additions
and
703 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,3 +2,4 @@ | |
^\.Rproj\.user$ | ||
|
||
^doc$ | ||
^Meta$ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,3 +2,5 @@ | |
.Rhistory | ||
.RData | ||
.Ruserdata | ||
doc | ||
Meta |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,68 +1,63 @@ | ||
#' @import dplyr | ||
#' @importFrom magrittr %>% | ||
#' @import parallel | ||
#' @import dplyr | ||
#' @export | ||
|
||
`%>%` <- magrittr::`%>%` | ||
|
||
args <- commandArgs(trailingOnly=TRUE) | ||
## arguments : AllInfo file, exonInfo.gff, threshold, grouping factor | ||
|
||
## arguments : AllInfo file, groupingFactor, nThreads, threshold | ||
|
||
allInfo <- data.table::fread(args[1],sep="\t") | ||
allInfo <- allInfo[,1:6] ## Read in file with read - gene - celltype assignments | ||
colnames(allInfo) <- c("Read","Gene","Celltype","Barcode","UMI","Stretch") | ||
groupingFactor <- args[4] | ||
|
||
geneInfo <- allInfo %>% dplyr::select(Read,Gene,groupingFactor) | ||
|
||
print("Reading in GFF file, will take a while") | ||
|
||
exonGFF <- read.table(gzfile(args[2]))[,-c(2,3,6,8,9,11)] ## GFF file with exon info | ||
colnames(exonGFF) <- c("chr","start","end","strand","readname") | ||
exonGFF <- exonGFF %>% tidyr::separate(readname,into=c("Read","path"),sep=".path") %>% | ||
dplyr::select(-path) %>% tidyr::unite(exon, c(chr,start,end,strand),sep="_",remove=FALSE) | ||
exonGFF <- dplyr::right_join(exonGFF,geneInfo) ## Merge gene information with GFF in case of multi-mapping exons | ||
|
||
threshold <- as.integer(args[3]) | ||
numThreads <- as.integer(args[5]) | ||
|
||
|
||
uniqExons <- exonGFF %>% dplyr::select(-c(Read,groupingFactor)) %>% dplyr::distinct() ## Get unique exons to iterate over | ||
edges <- exonGFF %>% dplyr::group_by(Read) %>% dplyr::mutate(s=min(start),e=max(end)) %>% | ||
dplyr::select(Read,Gene,s,e,exon,groupingFactor) %>% dplyr::distinct() %>% dplyr::as_data_frame() | ||
print(paste0("About to start processing exons per ",groupingFactor)) | ||
|
||
calcPSI <- function(x){ ## x for line in uniqExons | ||
geneDF <- edges %>% dplyr::filter(Gene == uniqExons$Gene[x]) %>% dplyr::group_by(.dots = groupingFactor) %>% | ||
dplyr::filter(s <= uniqExons$start[x] & e >= uniqExons$end[x]) %>% as.data.frame() | ||
numReads <- geneDF %>% dplyr::group_by(.dots = groupingFactor, Read) %>% dplyr::select(Read,groupingFactor) %>% | ||
dplyr::ungroup() %>% dplyr::group_by(.dots = groupingFactor) %>% dplyr::distinct() %>% | ||
dplyr::add_count() %>% dplyr::filter(n>=threshold) %>% as.data.frame() | ||
geneDF <- geneDF %>% dplyr::filter(Read %in% numReads$Read) %>% as.data.frame() | ||
u_gf <- geneDF %>% dplyr::select(groupingFactor) %>% dplyr::distinct() ## make list of unique grouping factors | ||
psiGF <- NULL | ||
for(gf in u_gf[,1]){ ## If total reads spanning exon more than sampling rate, extract those reads | ||
reads <- geneDF %>% dplyr::filter(get(groupingFactor) == gf) | ||
psi = NULL | ||
sr <- reads %>% dplyr::select(Read) %>% dplyr::distinct() | ||
inc <- reads %>% dplyr::filter(Read %in% sr$Read) %>% | ||
dplyr::filter(exon == uniqExons$exon[x]) %>% nrow() | ||
psi <- inc/nrow(sr) ## Total reads spanning the exon = sr | ||
psi <- as.data.frame(psi) | ||
psi$inclusion <- inc | ||
psi$exclusion <- nrow(sr) - inc | ||
psi$exon <- uniqExons$exon[x] | ||
psi$Gene <- uniqExons$Gene[x] | ||
psi[,as.character(groupingFactor)] <- gf | ||
psiGF <- rbind(psiGF,psi) } | ||
return(psiGF) | ||
if(ncol(allInfo) == 11){ | ||
allInfo <- allInfo[,c(1:4,9)] | ||
} else { allInfo <- allInfo[,c(1:4,7)]} ## Read in file with read - gene - celltype assignments | ||
|
||
colnames(allInfo) <- c("Read","Gene","Celltype","Barcode","Exons") | ||
groupingFactor <- args[2] | ||
|
||
nThreads <- as.integer(args[3]) | ||
rpg <- as.integer(args[4]) | ||
|
||
outDir <- 'ExonQuantOutput/' | ||
|
||
allInfo_SE <- allInfo %>% dplyr::group_by(Gene) %>% dplyr::add_count() %>% | ||
dplyr::filter(n >= rpg) %>% dplyr::select(-n) %>% dplyr::rowwise() %>% | ||
dplyr::mutate(start = unlist(strsplit(Exons,"_"))[2], | ||
end = rev(unlist(strsplit(Exons,"_")))[2]) %>% | ||
as.data.frame() | ||
|
||
internalExons = allInfo_SE %>% tidyr::separate_rows(Exons,sep = ";%;") %>% dplyr::filter(Exons != "") %>% | ||
dplyr::group_by(Read) %>% dplyr::add_count() %>% dplyr::filter(n>=3) %>% dplyr::slice(2:(dplyr::n()-1)) | ||
|
||
uniqExons <- internalExons %>% dplyr::ungroup() %>% dplyr::select(Exons, Gene) %>% dplyr::distinct() | ||
|
||
inclusionCounts <- internalExons %>% dplyr::ungroup() %>% dplyr::select(Exons,Gene,all_of(groupingFactor)) %>% | ||
dplyr::group_by(Exons,Gene,.dots=groupingFactor) %>% dplyr::add_count(name = "Inclusion") %>% | ||
dplyr::distinct() %>% as.data.frame() | ||
|
||
readSE <- internalExons %>% dplyr::select(Read,Gene,all_of(groupingFactor),start,end) %>% dplyr::distinct() %>% as.data.frame() | ||
|
||
checkSpanningReads <- function(gene){ | ||
exons <- uniqExons %>% dplyr::filter(Gene == gene) %>% | ||
tidyr::separate(Exons,into=c("chr","s","e","strand"),sep = "_",remove=FALSE) | ||
reads <- readSE %>% dplyr::filter(Gene == gene) | ||
spanningReads <- dplyr::left_join(reads,exons,by = "Gene") %>% dplyr::filter(s >= start && e <= end) %>% | ||
dplyr::select(Exons,Gene,all_of(groupingFactor)) %>% dplyr::group_by(Exons,Gene,.dots = groupingFactor) %>% | ||
dplyr::add_count(name = "Total") %>% dplyr::distinct() %>% as.data.frame() | ||
inclusionReads <- inclusionCounts %>% dplyr::filter(Gene == gene) | ||
inc_tot <- dplyr::right_join(inclusionReads,spanningReads,by = c('Exons',"Gene",groupingFactor)) %>% | ||
replace(is.na(.),0) %>% | ||
dplyr::mutate(PSI = Inclusion/Total, Exclusion = Total-Inclusion) %>% as.data.frame() | ||
fName <- file.path(outDir,"InclusionExclusionCounts.tsv") | ||
if(file.exists(fName)){ | ||
write.table(inc_tot,file.path(outDir,"InclusionExclusionCounts.tsv"),sep ="\t", | ||
append= T, quote = F, row.names = F, col.names = FALSE) | ||
} else{ | ||
write.table(inc_tot,file.path(outDir,"InclusionExclusionCounts.tsv"),sep ="\t", | ||
quote = F, row.names = F, col.names = TRUE) | ||
} | ||
return | ||
} | ||
|
||
|
||
sampledPSI <- parallel::mclapply(1:nrow(uniqExons), function(x) calcPSI(x), mc.cores=numThreads) | ||
#sampledPSI <- parallel::mclapply(1:200, function(x) calcPSI(x), mc.cores=28) ## testing purposes | ||
|
||
print("Converting to dataframe") | ||
psiDF <- dplyr::bind_rows(sampledPSI) %>% dplyr::select(exon,Gene,groupingFactor,inclusion,exclusion,psi) | ||
|
||
write.table(psiDF, file="ExonQuantOutput/InclusionExclusionCounts.tsv", | ||
sep="\t",quote=FALSE,row.names=FALSE,col.names=TRUE) | ||
byGene = parallel::mclapply(unique(uniqExons$Gene), function(g) checkSpanningReads(g), mc.cores = nThreads) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Oops, something went wrong.