-
Notifications
You must be signed in to change notification settings - Fork 1
/
matrix2clustering.R
executable file
·58 lines (48 loc) · 2.13 KB
/
matrix2clustering.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/env Rscript
suppressPackageStartupMessages(library("optparse"))
## parse command line arguments
option_list <- list(
make_option(c("-i", "--matrixFile"), help="input count matrix file (can be stdin)"),
make_option(c("-o", "--pdfFile"), help="output pdf file"),
make_option(c("-s", "--sessionFile"), help="output session file"),
make_option(c("-n", "--clusterCount"), help="number of clusters")
)
parser <- OptionParser(usage = "%prog [options]", option_list=option_list)
opt <- parse_args(parser)
## check, if all required arguments are given
if(is.null(opt$matrixFile) | is.null(opt$pdfFile) | is.null(opt$sessionFile) | is.null(opt$clusterCount)) {
cat("\nProgram: matrix2clustering.R (R script to perform clustering on input count matrix)\n")
cat("Author: BRIC, University of Copenhagen, Denmark\n")
cat("Version: 1.0\n")
cat("Contact: pundhir@binf.ku.dk\n");
print_help(parser)
q()
}
## load libraries
suppressPackageStartupMessages(library(session))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(gplots))
suppressPackageStartupMessages(library(dtw))
if(identical(opt$matrixFile, "stdin")==T) {
data <- read.table(file("stdin"))
} else {
data <- read.table(opt$matrixFile)
}
mat <- as.matrix(data)
set.seed(1)
kc_script <- kmeans(as.dist(1-cor(t(mat))), as.numeric(opt$clusterCount))
#kc_script <- kmeans(mat, as.numeric(opt$clusterCount))
#kc_script <- kmeans(proxy::dist(mat, method="DTW"), as.numeric(opt$clusterCount))
mydata <- mat
wss <- (nrow(mydata)-1)*sum(apply(mydata,2,var))
for (i in 2:15) wss[i] <- sum(kmeans(mydata, centers=i)$withinss)
df <- as.data.frame(mat)
df$clusters <- kc_script$cluster
df <- df[order(df$clusters),]
pdf(opt$pdfFile)
mat <- as.matrix(df[,c(1:ncol(df)-1)])
#heatmap.2(mat, dendrogram = "none", Rowv = FALSE, Colv = FALSE, trace = "none", col=bluered(10), labRow=NA, scale="row", na.rm=T)
heatmap(apply(mat, 2, rev), Colv=NA, Rowv=NA, scale="row", col=colorRampPalette(c("dodgerblue4","grey97", "sienna3"))(250), margins=c(5,10))
plot(1:15, wss, type="b", xlab="Number of Clusters", ylab="Within groups sum of squares")
dev.off()
save.session(opt$sessionFile)