-
Notifications
You must be signed in to change notification settings - Fork 1
/
experiments.r
126 lines (108 loc) · 4.17 KB
/
experiments.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# The experiments are called form this script
# It loads the data, call the Gibbs sampler,
# and saves the traces in files for further analysis
# and evaluation of thread length predictions in the test set
# author: Alberto Lumbreras
#######################################################
experiment <- function(nthreads.train, dataset, model, nsamples, K=5){
source('gibbs_dual.r')
# Load data
######################################
data.dir <- paste0('./data/', dataset, '/')
df <- read.table(paste0(data.dir, 'data_users_50.csv'), sep='\t', header=TRUE)
z_init <- df$z+1
if(dataset=='iris'){
A <- t(df[,2:4])
}else{
A <- t(df[,2:3])
}
B <- t(df$b)
y <- read.table(paste0(data.dir, 'train_lengths_50.csv'), sep='\t', header=TRUE)
P <- read.table(paste0(data.dir, 'train_participations_50.csv'), sep='\t', header=TRUE)
nthreads <- dim(P)[2]
# Select a random subset of threads for training the model
idx.train <- sample(nthreads, nthreads.train)
y <- y[idx.train,]
P <- P[,idx.train]
# Initialization of cluster assignments
########################################
# Non recommended.
z_init <- rep(1, dim(B)[2])
# Recommended (bit not a good initialization technique either!).
z_init <- sample(10, dim(B)[2], replace = TRUE)
# A la k-means (more clever)
z_init <- kmeans(t(A), 10)$cluster
# guarantees that there are no gaps!
# http://stackoverflow.com/questions/35141155/create-n-random-integers-with-no-gaps
tb <- table(z_init);
z_init <- rep(seq(tb),tb)
# run !
#############
if(model=="DP"){
res <- gibbs(A, B, P, y, z_init=z_init, iters=nsamples, DP=TRUE, views='both')
}
if(model=="fixed"){
z_init <- kmeans(t(A), K)$cluster
res <- gibbs(A, B, P, y, z_init=z_init, iters=nsamples, DP=FALSE, views='both')
}
if(model=="single"){
# Not implemented yed/
# In the paper, z=1 for all and not sample z (1 cluster)
# We should try with CRP in the regression model
# but likelihood co,puted only with coefficients
# sample_z and pass behavior view params
z_init <- kmeans(t(A), K)$cluster
res <- gibbs(A, B, P, y, z_init=z_init, iters=nsamples, DP=TRUE, views='behaviors')
}
# Save traces to files
#######################
# do not overwrite old experiments
experiment.path <- file.path("out", dataset, paste0(model, "_threads_", nthreads.train, "_", nsamples))
i <- 1
while(TRUE){
if(dir.exists(paste0(experiment.path, '-', i))){
i <- i+1
}
else{
experiment.path <- paste0(experiment.path, '-', i)
break
}
}
traces.path <- file.path(experiment.path, "traces")
dir.create(file.path(traces.path), recursive=TRUE)
for(i in 1:length(res)){
tr.name <- paste0(names(res)[i], '.trc')
write.matrix(as.data.frame(res[[i]]), file=file.path(traces.path,tr.name))
}
}
library(doParallel)
library(foreach)
library(parallel)
# Choose one of the datasers
dataset = 'confused_features'
dataset = 'clear'
dataset <- 'disagreement'
dataset <- 'agreement'
dataset = 'iris'
nsamples <- 20000
<<<<<<< HEAD
#i.seq <- rep(seq(10,100, by=10), 3)
i.seq <- rep(c(40), 5)
ncores <- detectCores() - 2
cl<-makeCluster(ncores, outfile="", port=11439)
registerDoParallel(cl)
pck = c('abind', 'MASS', 'mvtnorm', 'mixtools', 'coda', 'ars')
foreach(i=i.seq, .packages = pck)%dopar%experiment(i, dataset, 'single', nsamples, K=5)
#foreach(i=i.seq, .packages = pck)%dopar%experiment(i, dataset, 'DP', nsamples, K=5)
#foreach(i=i.seq, .packages = pck)%dopar%experiment(i, dataset, 'fixed', nsamples, K=5)
=======
i.seq <- rep(seq(20,100, by=10), 1)
ncores <- detectCores() - 2
cl<-makeCluster(ncores, outfile="", port=11439)
registerDoParallel(cl)
pck = c('abind', 'MASS', 'mvtnorm', 'mixtools', 'coda')
foreach(i=i.seq, .packages = pck)%dopar%experiment(i, dataset, 'single', nsamples, K=5)
foreach(i=i.seq, .packages = pck)%dopar%experiment(i, dataset, 'DP', nsamples, K=5)
foreach(i=i.seq, .packages = pck)%dopar%experiment(i, dataset, 'fixed', nsamples, K=5)
>>>>>>> cb9c7e3e353728a21e70ae04da064f57e5d0622a
stopCluster(cl)