-
Notifications
You must be signed in to change notification settings - Fork 1
/
ucsc2pdf
executable file
·178 lines (167 loc) · 8.09 KB
/
ucsc2pdf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
#!/usr/bin/env Rscript
#suppressPackageStartupMessages(library("optparse", lib.loc = "/home/users/sachin/R/x86_64-redhat-linux-gnu-library/2.15"))
suppressPackageStartupMessages(library("optparse"))
## parse command line arguments
option_list <- list(
make_option(c("-n", "--genome"), default="hg19", help="reference genome [default: %default]"),
make_option(c("-c", "--coor"), metavar="coordinate", help="coordinate (chr:start-end)"),
make_option(c("-b", "--bedfile"), metavar="bedfile", help="coordinates in BED format"),
make_option(c("-t", "--trackfile"), help="custom track file"),
make_option(c("-e", "--webDir"), default="~/public_html/ucsc_tracks/", help="web accessible directory to the track file [default: %default]"),
make_option(c("-w", "--webpath"), default="http://rth.dk/~sachin/ucsc_tracks/", help="web path to the track file [default: %default]"),
make_option(c("-m", "--mergePDF"), help="merge multiple tracks in one pdf file", action="store_true"),
make_option(c("-s", "--serverUCSC"), default="http://ucsc.genome.ku.dk", help="hyperlink to UCSC server [default: %default]"),
make_option(c("-x", "--customName"), help="custom name for the output pdf file"),
make_option(c("-o", "--outDir"), default=".", help="output directory [default: %default]")
)
parser <- OptionParser(usage = "%prog [options]", option_list=option_list)
opt <- parse_args(parser)
## check, if all required arguments are given
if((is.null(opt$coor) & is.null(opt$bedfile))) {
cat("\nProgram: ucsc2pdf (auto retrieve UCSC track(s) in pdf format)\n")
cat("Author: University of Copenhagen, Denmark\n")
cat("Version: 1.0\n")
cat("Contact: sachin@rth.dk\n");
print_help(parser)
q()
}
## initialize UCSC server
server <- NULL
if(is.null(opt$serverUCSC)) {
server <- "http://ucsc.genome.ku.dk"
} else { server <- opt$serverUCSC; }
####### lib ############
## CREDITS
## source: http://biostar.stackexchange.com/questions/6149/batch-viewing-of-ucsc-browser-graphic/6155#6155
## source: http://onetipperday.blogspot.dk/2012/07/get-ucsc-images-for-list-of-regions-in.html
## source: http://cran.r-project.org/web/packages/optparse/vignettes/optparse.pdf
## merge script
mergePDF <- function(output="merged.pdf", sourcefiles=c("source1.pdf","source2.pdf","source3.pdf"))
{
## create the command string and call the command using system()
## merging command from http://hints.macworld.com/article.php?story=2003083122212228
command=paste("gs -q -dNOPAUSE -dBATCH -sDEVICE=pdfwrite",paste("-sOutputFile",output, sep="="), paste(sourcefiles, collapse=" "),sep=" ")
try(system(command))
}
## Here is an R script wrote by Aaron Statham which saves UCSC to pdfs
## you can choose which genome and tracks to display by altering the 'url' parameter. 'trackfile' is the url of a file describing the custom tracks (beds/bigwigs) to display
screenshotUCSC <- function(url, trackfile, chr, start, end, filename) {
oldpen <- options("scipen")
options(scipen=100)
if(!is.null(opt$webpath)) {
trackfile <- paste(opt$webpath, sub("^.+\\/", "", trackfile, perl=TRUE), sep="/")
}
temp <- readLines(paste(url, "&hgt.customText=", trackfile, "&position=",chr,":",start,"-",end, sep=""))
pdfurl <- paste(server,"/trash",gsub(".*trash","",gsub(".pdf.*","",temp[grep(".pdf", temp, fixed=TRUE)][1])), ".pdf", sep="")
sink("test.txt")
print(paste(url, "&hgt.customText=", trackfile, "&position=",chr,":",start,"-",end, sep=""))
print(pdfurl)
sink()
options(scipen=oldpen)
filename <- paste(opt$outDir, filename, sep="/");
download.file(pdfurl, filename, mode="wb", quiet=TRUE)
}
## Author: Sachin Pundhir
## upload only those custom tracks to UCSC that are overlapping to input coordinate
editTrackFile <- function(trackFile, chr, start, end, tmpFile) {
stop = FALSE
f = file(trackFile, "r")
sink(tmpFile)
track_title <- NULL
while(!stop) {
next_line = readLines(f, n = 1)
chrTrack <- as.vector(as.data.frame(strsplit(next_line, "\t"))[1,1])
startTrack <- as.numeric(as.vector(as.data.frame(strsplit(next_line, "\t"))[2,1]))
endTrack <- as.numeric(as.vector(as.data.frame(strsplit(next_line, "\t"))[3,1]))
if(length(grep("track", next_line))>0) {
track_title=next_line
} else if(length(next_line) == 0) {
stop = TRUE
close(f)
} else if(chrTrack==chr & startTrack>=start & endTrack<=end) {
if(!is.null(track_title)) {
cat(track_title)
cat("\n")
track_title<-NULL
}
cat(next_line)
cat("\n")
}
}
sink()
}
## define the default track
## controlling individual tracks
theURL=paste(server, "/cgi-bin/hgTracks?db=", sep="")
theURL=paste(theURL, opt$genome,sep="")
theURL=paste(theURL,"&wgRna=hide&cpgIslandExt=pack&ensGene=hide&mrna=hide&intronEst=hide&mgcGenes=hide&hgt.psOutput=on&cons46way=hide&snp130=hide&snpArray=hide&wgEncodeReg=hide&pix=1000&refGene=hide&knownGene=hide&rmsk=hide&cpgIslandExt=hide&snp135Common=hide&ruler=dense&cons60way=hide",sep="")
## controling via session
#theURL="http://genome-preview.ucsc.edu/cgi-bin/hgTracks?hgS_doOtherUser=submit&hgS_otherUserName=sterding&hgS_otherUserSessionName=<SESSION_NAME>&hgt.psOutput=on&pix=1000"
## read coordinates
toPlot <- NULL
chr <- NULL
start <- NULL
end <- NULL
if ( !is.null(opt$coor) ) {
toPlot = opt$coor
chr <- as.vector(as.data.frame(strsplit(opt$coor, "[:-]+"))[1,1])
start <- as.numeric(as.vector(as.data.frame(strsplit(opt$coor, "[:-]+"))[2,1]))
end <- as.numeric(as.vector(as.data.frame(strsplit(opt$coor, "[:-]+"))[3,1]))
if(!is.null(opt$trackfile)) {
if(!is.null(opt$customName)) {
tempfile <- paste(opt$webDir, "/", opt$customName, "_", chr, "_", start, "_", end, ".tmp", sep="");
}
else {
tempfile <- paste(opt$webDir, "/", chr, "_", start, "_", end, ".tmp", sep="");
}
editTrackFile(opt$trackfile, chr, start, end, tempfile)
if(!is.null(opt$customName)) {
screenshotUCSC(theURL, tempfile, chr, start-10, end+9, paste(opt$customName, "_", chr, "_", start, "_", end, ".pdf", sep=""))
}
else {
screenshotUCSC(theURL, tempfile, chr, start-10, end+9, paste(chr, "_", start, "_", end, ".pdf", sep=""))
}
} else {
if(!is.null(opt$customName)) {
screenshotUCSC(theURL, "", chr, start-10, end+9, paste(opt$customName, "_", chr, "_", start, "_", end, ".pdf", sep=""))
}
else {
screenshotUCSC(theURL, "", chr, start-10, end+9, paste(chr, "_", start, "_", end, ".pdf", sep=""))
}
}
outfile <- paste(opt$coor, ".pdf", sep="")
outfile <- gsub("[:-]+", "_", outfile, perl=TRUE)
} else if ( !is.null(opt$bedfile) ) {
toPlot=read.table(opt$bedfile)
## anti-robot version
## UCSC Policy: Program-driven use of this software is limited to a maximum of one hit every 15 seconds and no more than 5,000 hits per day.
for(i in 1:nrow(toPlot)){
chr <- as.character(toPlot[i,1])
start <- toPlot[i,2]-10
end <- toPlot[i,3]+9
if(!is.null(opt$trackfile)) {
if(!is.null(opt$customName)) {
tempfile <- paste(opt$webDir, "/", opt$customName, "_", chr, "_", start, "_", end, ".tmp", sep="");
}
else {
tempfile <- paste(opt$webDir, "/", chr, "_", start, "_", end, ".tmp", sep="");
}
editTrackFile(opt$trackfile, chr, start, end, tempfile)
screenshotUCSC(theURL, tempfile, chr, start, end, paste(opt$customName, "region_", i, "_", chr, "_", start, "_", end, ".pdf", sep=""))
} else {
screenshotUCSC(theURL, "", chr, start, end, paste(opt$customName, "region_", i, "_", chr, "_", start, "_", end, ".pdf", sep=""))
}
Sys.sleep(5)
}
outfile <- paste(opt$bedfile, ".pdf", sep="")
outfile <- gsub("[:-]+", "_", outfile, perl=TRUE)
}
if(!is.null(opt$mergePDF)) {
mergePDF(outfile, list.files(pattern="region_.*.pdf"))
try(system("rm region_*.pdf"))
}
q()
## parallel version
#library(multicore)
#mclapply(1:nrow(toPlot), function(i) screenshotUCSC(theURL, "", as.character(toPlot$chr[i]), toPlot$start[i]-2000, toPlot$end[i]+1999, paste("region_", i, "_", toPlot$name[i],".pdf", sep="")), mc.cores=10)
#"URL_of_your_custom_track", toPlot$space[i], toPlot$start[i]-3000, toPlot$start[i]+2999, paste("Figures/Shots/Low_To_High_", i, ".pdf", sep="")), mc.cores=10)