forked from aidenlab/EigenVector
-
Notifications
You must be signed in to change notification settings - Fork 0
/
eigenVectorRscript.R
74 lines (69 loc) · 3.06 KB
/
eigenVectorRscript.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
library(optparse,quietly=TRUE)
library(Matrix,quietly=TRUE)
source("bestEigen3.R")
option_list <- list(
make_option(c("-t","--tolerance"), help = "precision", type="double", default=1.0e-6),
make_option(c("-m","--maxiter"), help = "maximum iterations", type="integer",default=NA),
make_option(c("-s","--size"),help = "chromosome length (in basepairs)", type="integer",default=NA),
make_option(c("-v","--verbose"),help = "verbose", type="logical",default=FALSE)
)
parser <- OptionParser(
usage = paste("Rscript --vanilla %prog [OPTIONS] inFfile outFile resolution (basepairs)",
"computes the principal eigenvector of the correlation matrix of HiC contacts matrix",
"",
"inFile = file containing the contacts for the chromosome (as produced by dump)",
"outFile = file to output the eigenvector; outFile.report will also be produced to report run summary",
"resolution = resolution in basepairs", sep="\n"),
epilogue = "inFile, outFile and resolution are required.\n",
option_list=option_list)
arguments=NA
tryCatch(
{ arguments = parse_args(parser, positional_arguments=TRUE);},
error = function(e) { })
if (all(is.na(arguments))) {
stop (paste("Failed to parse command-line parameters",
"Use --help for help"))
}
opts = arguments$options
if (length(arguments$args) < 3) stop("Need inFile, outFile and resolution\nUse --help for help")
inFile = arguments$args[1]
outFile = arguments$args[2]
binsize = as.numeric(arguments$args[3])
verbose <- opts$verbose
tol <- as.numeric(opts$tolerance)
maxiter <- 100
if (!is.na(opts$maxiter)) maxiter <- as.numeric(opts$maxiter)
t1 <- system.time(y <- scan(inFile,what=list(s=integer(),e=integer(),v=double()),quiet=TRUE))
t1 <- t1["elapsed"]
k <- length(y$s)
if (verbose) print(paste("took",t1,"seconds to read",k,"records"),digits=6, quote=FALSE)
y$v[is.na(y$v)] <- 0
i <- y$s/binsize+1
j <- y$e/binsize+1
n <- max(j)
if (!is.na(opts$size)) n <- ceiling(as.numeric(opts$size)/binsize)
t2 <- system.time(x <- sparseMatrix(i=i,j=j,x=y$v,dims=c(n,n),symmetric=TRUE))
t2 <- t2["elapsed"]
if (verbose) print(paste("took",t2,"seconds to build sparse matrix"),digits=6, quote=FALSE)
rm(y)
rm(i)
rm(j)
for (i in 1:2) gc(verbose=FALSE)
t3 <- system.time(res <- bestEigen(x,maxiter=maxiter,tol=tol))
t3 <- t3["elapsed"]
if (verbose) {
print(paste("took",t1,"seconds to read",k,"records"),digits=6, quote=FALSE)
print(paste("took",t2,"seconds to build sparse matrix"),digits=6, quote=FALSE)
print(paste("eigenvector computation took",t3,"seconds"),digits=6, quote=FALSE)
print(paste("lam1 =",res$lam1,",er =",res$er,",lam2 =",res$lam2," niter=",res$niter),digits=6, quote=FALSE)
}
write.table(res$v,outFile,quote=FALSE,row.names=FALSE,col.names=FALSE)
writeLines(
c(
paste("took",t1,"seconds to read",k,"records"),
paste("took",t2,"seconds to build sparse matrix"),
paste("eigenvector computation took",t3,"seconds"),
paste("lam1 =",res$lam1,",er =",res$er,",lam2 =",res$lam2," niter=",res$niter)
),
paste0(outFile,".report")
)