Merge pull request #4 from edwindj/master

Adding a formula interface with `elm`
mlampros · Oct 15, 2021 · f44f415 · f44f415
2 parents 81b23cd + 69f8362
commit f44f415
Show file tree

Hide file tree

Showing 10 changed files with 357 additions and 1 deletion.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -4,3 +4,5 @@
 ^\.ccache$
 ^\.github$
 ^tic\.R$
+^.*\.Rproj$
+^\.Rproj\.user$
diff --git a/.gitignore b/.gitignore
@@ -1 +1,2 @@
 docs/
+.Rproj.user
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -17,4 +17,4 @@ Suggests: testthat,
     knitr,
     rmarkdown
 VignetteBuilder: knitr
-RoxygenNote: 7.1.1
+RoxygenNote: 7.1.2
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,5 +1,10 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(fitted,elm)
+S3method(predict,elm)
+S3method(print,elm)
+S3method(residuals,elm)
+export(elm)
 export(elm_predict)
 export(elm_train)
 export(onehot_encode)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,6 @@
+## elmNNRcpp 1.0.3.9000
+
+* Added formula interface through function `elm`.
 
 ## elmNNRcpp 1.0.3
 

diff --git a/R/elm.R b/R/elm.R
@@ -0,0 +1,181 @@
+#' Fit an extreme learning model
+#'
+#' Formula interface for \code{\link{elm_train}}, transforms a data frame and formula
+#' into the necessary input for elm_train, automatically calls \code{\link{onehot_encode}}
+#' for classification.
+#'
+#' @examples
+#' elm(Species ~ ., data = iris, nhid = 20, actfun="sig")
+#' 
+#' mod_elm <- elm(Species ~ ., data = iris, nhid = 20, actfun="sig")
+#' 
+#' # predict classes
+#' predict(mod_elm, newdata = iris[1:3,-5])
+#' 
+#' # predict probabilities
+#' predict(mod_elm, newdata = iris[1:3,-5], type="prob")
+#'
+#' # predict elm output
+#' predict(mod_elm, newdata = iris[1:3,-5], type="raw")
+#' 
+#' data("Boston")
+#' elm(medv ~ ., data = Boston, nhid = 40, actfun="relu")
+#'
+#' data("ionosphere")
+#' elm(class ~ ., data = ionosphere, nhid=20, actfun="relu")
+#'
+#' @export
+#' @inheritParams elm_train
+#' @param formula formula used to specify the regression or classification.
+#' @param data data.frame with the data
+#' @return elm object which can be used with predict, residuals and fitted.
+elm <- function(formula, data, nhid, actfun, init_weights = "normal_gaussian", bias = FALSE, moorep_pseudoinv_tol = 0.01,
+                leaky_relu_alpha = 0.0, seed = 1, verbose = FALSE){
+  data <- as.data.frame(data)
+  mf <- stats::model.frame(formula, data = data)
+  mm <- stats::model.matrix(formula, mf)
+
+  y <- mf[[1]]
+
+  # TODO fix the categorical predictors...
+  x <- mm[,-1, drop=FALSE]
+
+  # regression or classification?
+  is_regression <- is.numeric(y)
+  is_logical <- is.logical(y)
+
+  if (is_regression){
+    y <- matrix(y, ncol=1)
+  } else if (is_logical){
+    y <- matrix(as.integer(y), ncol=1)
+    levs <- NULL
+  } else {
+    # TODO logical
+    yf <- as.factor(y)
+
+    levs <- levels(yf)
+    y <- onehot_encode(as.integer(yf) - 1)
+    colnames(y) <- levs
+  }
+
+  fit <- elm_train( x = x
+                  , y = y
+                  , nhid = nhid
+                  , actfun = actfun
+                  , init_weights = init_weights
+                  , bias =bias
+                  , moorep_pseudoinv_tol = moorep_pseudoinv_tol
+                  , leaky_relu_alpha = leaky_relu_alpha
+                  , seed = seed
+                  , verbose = verbose
+                  )
+
+  class(fit) <- "elm"
+
+  fit$formula <- stats::terms(mf)
+  fit$call <- sys.call()
+  fit$nhid <- nhid
+  fit$actfun <- actfun
+  fit$is_regression <- is_regression
+  fit$is_logical <- is_logical
+
+  colnames(fit$inpweight) <- colnames(x)
+
+  if (is_regression){
+    #dim(fit$outweight) <- NULL
+    dim(fit$predictions) <- NULL
+    dim(fit$fitted_values) <- NULL
+    dim(fit$residuals) <- NULL
+  } else if (is_logical){
+    dim(fit$predictions) <- NULL
+    dim(fit$fitted_values) <- NULL
+    dim(fit$residuals) <- NULL
+    fit$pred_class <- fit$predictions >= 0.5
+    fit$y <- mf[[1]]
+  } else {
+    colnames(fit$outweight) <- levs
+    colnames(fit$predictions) <- levs
+    colnames(fit$fitted_values) <- levs
+    colnames(fit$residuals) <- levs
+    fit$pred_class <- levs[apply(fit$predictions, 1, which.max)]
+    fit$pred_class <- factor(fit$pred_class, levels=levs)
+    fit$y <- yf
+  }
+
+  fit
+}
+
+
+#' @export
+print.elm <- function(x,...){
+  cat("Extreme learning model, elm")
+  if (x$is_regression){
+    cat(" (regression)")
+  } else {
+    cat(" (classification)")
+  }
+  cat(":\n\n")
+  cat("call: ", deparse(x$call), "\n")
+  cat("hidden units       :", x$nhid, "\n")
+  cat("activation function:", x$actfun, "\n")
+
+  if (x$is_regression){
+    cat("mse                :", mean(x$residuals^2), "\n")
+  } else {
+    cat("accuracy           :", mean(x$y == x$pred_class), "\n")
+    cat("\nconfusion matrix :\n")
+    print(table(observed=x$y, predicted = x$pred_class))
+  }
+}
+
+#' @export
+residuals.elm <- function(object, ...){
+  object$residuals
+}
+
+#' @export
+fitted.elm <- function(object, ...){
+  object$fitted_values
+}
+
+#' Predict with elm
+#' 
+#' Wrapper for \code{\link{elm_predict}}.
+#' @export
+#' @param object elm model fitted with \code{\link{elm}}.
+#' @param newdata data.frame with the new data
+#' @param type only used with classification, can be either "class", "prob", "raw", 
+#' which are class (vector), probability (matrix) or the output of the elm function (matrix).
+#' @param ... not used
+#' @return predicted values
+predict.elm <- function(object, newdata, type=c("class", "prob", "raw"), ...){
+  type <- match.arg(type)
+  if (object$is_regression && type != "class"){
+    warning("type is ignored for regression", call. = FALSE)
+  }
+  if (missing(newdata)){
+    predictions <- object$predictions
+  } else {
+    f <- object$formula
+
+    y_name <- as.character(f[[2]])
+    newdata[y_name] <- 1 # just a value, not used
+
+    mf <- stats::model.frame(object$formula, data = newdata)
+    mm <- stats::model.matrix(f, mf)
+    x <- mm[,-1, drop=FALSE]
+
+    predictions <- elm_predict(unclass(object), newdata = x, normalize = (type=="prob"))
+    colnames(predictions) <- colnames(object$predictions)
+  }
+  if (type == "class"){
+    if (object$is_logical){
+      predictions <- predictions >= 0.5
+    } else if (!object$is_regression){
+      levs <- colnames(object$predictions)
+      pred_class <- levs[apply(predictions, 1, which.max)]
+      predictions <- factor(pred_class, levels=levs)
+    }
+  }
+  drop(predictions)
+}
diff --git a/man/elm.Rd b/man/elm.Rd
diff --git a/man/predict.elm.Rd b/man/predict.elm.Rd
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
@@ -6,6 +6,11 @@
 
 using namespace Rcpp;
 
+#ifdef RCPP_USE_GLOBAL_ROSTREAM
+Rcpp::Rostream<true>&  Rcpp::Rcout = Rcpp::Rcpp_cout_get();
+Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
+#endif
+
 // satlins
 arma::mat satlins(arma::mat x);
 RcppExport SEXP _elmNNRcpp_satlins(SEXP xSEXP) {

diff --git a/tests/testthat/test-elm.R b/tests/testthat/test-elm.R
@@ -0,0 +1,66 @@
+test_that("elm regression works", {
+  boston_train <- KernelKnn::Boston[1:350,]
+  boston_test <- KernelKnn::Boston[-c(1:350),]
+
+  model_train <- elm_train(xtr, ytr, nhid=100, actfun = "sig")
+  model <- elm(medv ~ ., data = boston_train, nhid=100, actfun="sig")
+
+  expect_equal(class(model), "elm")
+  expect_true(model$is_regression)
+
+  pred <- predict(model, newdata=boston_train)
+  expect_equal(pred, model$predictions)
+  expect_equal(residuals(model), model$residuals)
+  expect_equal(fitted(model), model$fitted_values)
+
+  expect_equal(model$outweight, model_train$outweight)
+  expect_equal(model$predictions, drop(model_train$predictions))
+  expect_equal(model$residuals, drop(model_train$residuals))
+})
+
+test_that("elm classification works", {
+  ionosphere_train <- KernelKnn::ionosphere[1:200, -2]
+  ionosphere_test <- KernelKnn::ionosphere[-c(1:200), -2]
+
+  model_train <- elm_train(xtr_class, ytr_class, nhid=20, actfun = "relu")
+  model <- elm(class ~ ., data = ionosphere_train, nhid=20, actfun="relu")
+
+  expect_equal(class(model), "elm")
+  expect_false(model$is_regression)
+
+  pred <- predict(model, newdata=ionosphere_train, type="raw")
+  pred_train <- elm_predict(model_train, newdata = xtr_class, normalize = FALSE)
+  expect_equal(pred, model$predictions)
+  expect_equal(unname(pred), model_train$predictions)
+  expect_equal(unname(pred), pred_train)
+
+  pred <- predict(model, newdata=ionosphere_train, type="prob")
+  pred_train <- elm_predict(model_train, newdata = xtr_class, normalize = TRUE)
+  expect_equal(unname(pred), pred_train)
+
+  pred <- predict(model, newdata = ionosphere_train, type="class")
+  expect_equal(pred, model$pred_class)
+
+  expect_equal(residuals(model), model$residuals)
+  expect_equal(fitted(model), model$fitted_values)
+
+  expect_equal(unname(model$outweight), model_train$outweight)
+  expect_equal(unname(model$predictions), drop(model_train$predictions))
+  expect_equal(unname(model$residuals), drop(model_train$residuals))
+
+  expect_equal(colnames(model$outweight), levels(ionosphere$class))
+  expect_equal(colnames(model$predictions), levels(ionosphere$class))
+  expect_equal(colnames(model$residuals), levels(ionosphere$class))
+})
+
+test_that("elm works with binary classification", {
+  data <- data.frame(y = c(TRUE, FALSE), x = 1:2)
+  model <- elm(y ~ ., data = data, nhid=1, actfun = "sig")
+  expect_equal(model$pred_class, data$y)
+
+  pred <- predict(model, newdata = data)
+  expect_equal(pred, model$pred_class)
+
+  pred <- predict(model, newdata = data, type="raw")
+  expect_equal(pred, model$predictions)
+})