Skip to content

Commit

Permalink
add the possibility to run some steps in parallel and some not
Browse files Browse the repository at this point in the history
  • Loading branch information
robingenuer committed Jul 16, 2019
1 parent aa0d928 commit 7c8d124
Show file tree
Hide file tree
Showing 8 changed files with 69 additions and 37 deletions.
31 changes: 17 additions & 14 deletions R/VSURF.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,9 @@
#' passed to each intermediate function: \code{VSURF_thres},
#' \code{VSURF_interp}, \code{VSURF_pred}, in this order.
#' @param parallel A logical indicating if you want VSURF to run in parallel on
#' multiple cores (default to FALSE).
#' multiple cores (default to FALSE). If a vector of length 3 is given,
#' each coordinate is passed to each intermediate function: \code{VSURF_thres},
#' \code{VSURF_interp}, \code{VSURF_pred}, in this order.
#' @param ncores Number of cores to use. Default is set to the number of cores
#' detected by R minus 1.
#' @param clusterType Type of the multiple cores cluster used to run VSURF in
Expand All @@ -97,6 +99,7 @@
#' \code{VSURF_interp}, in this order.
#' @param verbose A logical indicating if information about method's progress
#' (included progress bars for each step) must be printed (default to TRUE).
#' Adds a small extra overload.
#' @param ... others parameters to be passed on to the \code{randomForest}
#' function (see ?randomForest for further information).
#'
Expand Down Expand Up @@ -215,39 +218,39 @@ VSURF <- function (x, ...) {
VSURF.default <- function(
x, y, ntree = 2000, mtry = max(floor(ncol(x)/3), 1),
nfor.thres = 50, nmin = 1, nfor.interp = 25, nsd = 1, nfor.pred = 25, nmj = 1,
RFimplem = "randomForest", parallel = FALSE,
ncores = detectCores() - 1, clusterType = "PSOCK", verbose = TRUE, ...) {
RFimplem = "randomForest", parallel = FALSE, ncores = detectCores() - 1,
clusterType = "PSOCK", verbose = TRUE, ...) {

start <- Sys.time()

if (!parallel) {
clusterType <- NULL
ncores <- NULL
}

thres <- VSURF_thres(
x=x, y=y, ntree=ntree, mtry=mtry, nfor.thres=nfor.thres, nmin=nmin,
RFimplem = ifelse(length(RFimplem) == 3, RFimplem[1], RFimplem),
parallel=parallel,
clusterType = ifelse(length(clusterType) == 2, clusterType[1], clusterType),
parallel = ifelse(length(parallel) == 3, parallel[1], parallel),
clusterType = ifelse(length(clusterType) > 1, clusterType[1], clusterType),
ncores=ncores, verbose = verbose, ...)

interp <- VSURF_interp(
x=x, y=y, ntree=ntree, vars=thres$varselect.thres, nfor.interp=nfor.interp,
nsd=nsd, RFimplem = ifelse(length(RFimplem) == 3, RFimplem[2], RFimplem),
parallel=parallel,
clusterType = ifelse(length(clusterType) == 2, clusterType[2], clusterType),
parallel = ifelse(length(parallel) == 3, parallel[2], parallel),
clusterType = ifelse(length(clusterType) > 1, clusterType[2], clusterType),
ncores=ncores, verbose = verbose, ...)

pred <- VSURF_pred(x=x, y=y, ntree=ntree, err.interp=interp$err.interp,
varselect.interp=interp$varselect.interp, nfor.pred=nfor.pred, nmj=nmj,
RFimplem = ifelse(length(RFimplem) == 3, RFimplem[3], RFimplem),
verbose = verbose,...)
parallel = ifelse(length(parallel) == 3, parallel[3], parallel),
ncores = ncores, verbose = verbose, ...)

cl <- match.call()
cl[[1]] <- as.name("VSURF")

overall.time <- Sys.time()-start
if (identical(parallel, FALSE) | identical(parallel, rep(FALSE, 3))) {
clusterType <- NULL
ncores <- NULL
}
overall.time <- Sys.time() - start

output <- list('varselect.thres'=thres$varselect.thres,
'varselect.interp'=interp$varselect.interp,
Expand Down
7 changes: 5 additions & 2 deletions R/VSURF_interp.R
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ VSURF_interp.default <- function(
if (verbose == TRUE) cat(paste("\nInterpretation step (on", length(vars), "variables)\n"))

if (!parallel) {
clusterType <- NULL
ncores <- NULL
}

Expand Down Expand Up @@ -247,7 +246,7 @@ VSURF_interp.default <- function(
}

# initialization of the progress bar
if (verbose == TRUE & parallel == FALSE) {
if (verbose == TRUE & (parallel == FALSE | clusterType %in% c("ranger", "Rborist"))) {
pb <- utils::txtProgressBar(style = 3)
nBar <- 1
}
Expand Down Expand Up @@ -301,6 +300,7 @@ VSURF_interp.default <- function(
}
} else {
if (clusterType == "ranger") {
if (RFimplem != "ranger") stop("RFimplem must be set to 'ranger' to use clusterType 'ranger'")
for (i in 1:nvars){
res <- rf.interp.ranger(i, nfor.interp, num.threads = ncores, ...)
err.interp[i] <- res[1]
Expand All @@ -312,6 +312,7 @@ VSURF_interp.default <- function(
}
} else {
if (clusterType == "Rborist") {
if (RFimplem != "Rborist") stop("RFimplem must be set to 'Rborist' to use clusterType 'Rborist'")
for (i in 1:nvars){
res <- rf.interp.Rborist(i, nfor.interp, nThread = ncores, ...)
err.interp[i] <- res[1]
Expand Down Expand Up @@ -396,6 +397,8 @@ VSURF_interp.default <- function(
cl <- match.call()
cl[[1]] <- as.name("VSURF_interp")

if (!parallel) clusterType <- NULL

comput.time <- Sys.time()-start

output <- list('varselect.interp'=varselect,
Expand Down
26 changes: 15 additions & 11 deletions R/VSURF_pred.R
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@ VSURF_pred <- function (x, ...) {
#' @rdname VSURF_pred
#' @export
VSURF_pred.default <-function(x, y, ntree = 2000, err.interp, varselect.interp,
nfor.pred = 25, nmj = 1, RFimplem = "randomForest", verbose = TRUE, ...) {
nfor.pred = 25, nmj = 1, RFimplem = "randomForest", parallel = FALSE,
ncores = detectCores()-1, verbose = TRUE, ...) {

# err.interp: interpretation models errors
# varselect.interp: interpretation variables indices
Expand Down Expand Up @@ -197,15 +198,16 @@ did not eliminate variables")
if (RFimplem == "ranger") {
dat <- cbind(w, "y" = y)
for (j in 1:nfor.pred) {
rf[j] <- ranger::ranger(dependent.variable.name="y", data=dat,
num.trees=ntree,
...)$prediction.error
rf[j] <- ranger::ranger(dependent.variable.name="y", data=dat,
num.threads = ifelse(parallel, ncores, 1),
num.trees=ntree, ...)$prediction.error
}
err.pred <- mean(rf)
}
if (RFimplem == "Rborist") {
for (j in 1:nfor.pred) {
rf[j] <- Rborist::Rborist(x = w, y = y, nTree = ntree, minInfo = 0,
nThread = ifelse(parallel, ncores, 1),
...)$validation$oobError
}
err.pred <- mean(rf)
Expand Down Expand Up @@ -251,28 +253,30 @@ did not eliminate variables")
if (i <= n) {
for (j in 1:nfor.pred) {
rf[j] <- ranger::ranger(dependent.variable.name="y", data=dat,
num.threads = ifelse(parallel, ncores, 1),
num.trees=ntree, ...)$prediction.error
}
} else {
for (j in 1:nfor.pred) {
rf[j] <- ranger::ranger(dependent.variable.name="y", data=dat,
mtry=i/3, num.trees=ntree,
...)$prediction.error
num.threads = ifelse(parallel, ncores, 1),
num.trees=ntree, mtry=i/3, ...)$prediction.error
}
}
z <- mean(rf)
}
if (RFimplem == "Rborist") {
if (i <= n) {
for (j in 1:nfor.pred) {
rf[j] <- Rborist::Rborist(x = w, y = y, nTree = ntree,
minInfo = 0, ...)$validation$oobError
rf[j] <- Rborist::Rborist(x = w, y = y, nTree = ntree, minInfo = 0,
nThread = ifelse(parallel, ncores, 1),
...)$validation$oobError
}
} else {
for (j in 1:nfor.pred) {
rf[j] <- Rborist::Rborist(x = w, y = y, nTree = ntree,
minInfo = 0, predFixed = i/3,
...)$validation$oobError
rf[j] <- Rborist::Rborist(x = w, y = y, nTree = ntree, minInfo = 0,
nThread = ifelse(parallel, ncores, 1),
predFixed = i/3, ...)$validation$oobError
}
}
z <- mean(rf)
Expand Down
7 changes: 5 additions & 2 deletions R/VSURF_thres.R
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,6 @@ VSURF_thres.default <- function(
if (verbose == TRUE) cat(paste("Thresholding step\n"))

if (!parallel) {
clusterType <- NULL
ncores <- NULL
} else {
ncores <- min(nfor.thres, ncores)
Expand Down Expand Up @@ -162,6 +161,7 @@ VSURF_thres.default <- function(
# filling of perf with the nfor.thres forests OOB errors

if (RFimplem == "Rborist") RFimplem <- "randomForest"
if (clusterType == "Rborist") clusterType <- "PSOCK"

if (RFimplem == "randomForest") {
rf.classif <- function(i, ...) {
Expand Down Expand Up @@ -208,7 +208,7 @@ VSURF_thres.default <- function(
}

# initialization of the progress bar
if (verbose == TRUE & parallel == FALSE) {
if (verbose == TRUE & (parallel == FALSE | clusterType == "ranger")) {
pb <- utils::txtProgressBar(style = 3)
nBar <- 1
}
Expand Down Expand Up @@ -251,6 +251,7 @@ VSURF_thres.default <- function(
}
} else {
if (clusterType == "ranger") {
if (RFimplem != "ranger") stop("RFimplem must be set to 'ranger' to use clusterType 'ranger'")
for (i in 1:nfor.thres) {
rf <- rf.ranger(i, num.threads = ncores, ...)
m[i,] <- rf$m
Expand Down Expand Up @@ -371,6 +372,8 @@ VSURF_thres.default <- function(
cl <- match.call()
cl[[1]] <- as.name("VSURF_thres")

if (!parallel) clusterType <- NULL

comput.time <- Sys.time()-start

output <- list('varselect.thres'=varselect.thres,
Expand Down
7 changes: 5 additions & 2 deletions man/VSURF.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 5 additions & 2 deletions man/VSURF_interp.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 12 additions & 2 deletions man/VSURF_pred.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 5 additions & 2 deletions man/VSURF_thres.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 7c8d124

Please sign in to comment.