-
Notifications
You must be signed in to change notification settings - Fork 21
/
compile_results.R
executable file
·64 lines (52 loc) · 2.41 KB
/
compile_results.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
options(echo=F)
# make a list of the summary files from the pipe-line
args = commandArgs(trailingOnly = TRUE)
out_name=args[1]
sfiles=args[2:length(args)]
summary_files=sfiles
sfiles=sapply(strsplit(sfiles,"/"),function(x){paste(x[(length(x)-1):length(x)],collapse="/")})
dir=sapply(strsplit(sfiles,"/"),function(x){x[1]})
message("Compiling the results from:")
message(paste(dir,collapse=" "))
#check to see if the summary file was even made:
#exists=file.exists(summary_files)
info = file.info(summary_files)
exists = summary_files %in% rownames(info[!is.na(info$size) & info$size>0, ])
if(sum(!exists)>0){
show("No fusions were found for the following samples:")
show(dir[!exists])
}
dir=dir[exists]
summary_files=summary_files[exists]
# parse each file:
full_list<-data.frame()
for(i in 1:length(dir)){
single_list=read.delim(summary_files[i],stringsAsFactors=F)
single_list$sample<-rep(dir[i],length(single_list$transcript))
#rearrange
n=dim(single_list)[2]
full_list<-rbind(full_list,single_list)
}
colnames(full_list)<-gsub("_"," ",colnames(full_list))
colnames(full_list)[colnames(full_list)=="transcript"]<-"contig"
colnames(full_list)[colnames(full_list)=="gap"]<-"gap (kb)"
#reorder
full_list=full_list[,c("sample","fusion genes","chrom1","base1","strand1","chrom2","base2","strand2",
"gap (kb)","spanning pairs","spanning reads",
"inframe","aligns","rearrangement",
"contig","contig break","classification","known")]
# "contig","contig break","classification","known","geneCounts1","geneCounts2","exon1","exon2")]
#first order on the gap size (useful for direct mode where the spanning reads are mostly just 1)
full_list=full_list[order(as.numeric(full_list$`gap (kb)`),decreasing=F),]
#now order on the number of supporting reads
supporting_reads=full_list$`spanning pairs`
supporting_reads[supporting_reads=="-"]<-0
supporting_reads=as.numeric(supporting_reads)+full_list$`spanning reads`
#full_list=full_list[order(as.numeric(full_list$`spanning reads`),decreasing=T),]
full_list=full_list[order(supporting_reads,decreasing=T),]
v=split(full_list,full_list$classification)
#then order on classification
full_list=rbind(v[["HighConfidence"]],v[["MediumConfidence"]],v[["LowConfidence"]],v[["PotentialTransSplicing"]],
v[["PotentialReadThrough"]])
write.csv(full_list,paste(out_name,".csv",sep=""),row.names=F,quote=F)
message(paste("Done writing output ",out_name,".csv",sep=""))