dmlc · hcho3 · Sep 12, 2020 · Sep 10, 2020 · Sep 10, 2020 · Sep 11, 2020
diff --git a/Makefile b/Makefile
@@ -138,12 +138,14 @@ Rpack: clean_all
 	rm xgboost/remove_warning_suppression_pragma.sh
 	rm -rfv xgboost/tests/helper_scripts/
 
+R ?= R
+
 Rbuild: Rpack
-	R CMD build --no-build-vignettes xgboost
+	$(R) CMD build --no-build-vignettes xgboost
 	rm -rf xgboost
 
 Rcheck: Rbuild
-	R CMD check --as-cran xgboost*.tar.gz
+	$(R) CMD check --as-cran xgboost*.tar.gz
 
 -include build/*.d
 -include build/*/*.d
diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION
@@ -63,6 +63,5 @@ Imports:
     methods,
     data.table (>= 1.9.6),
     magrittr (>= 1.5),
-    stringi (>= 0.5.2)
 RoxygenNote: 7.1.1
 SystemRequirements: GNU make, C++14
diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE
@@ -81,11 +81,6 @@ importFrom(graphics,title)
 importFrom(magrittr,"%>%")
 importFrom(stats,median)
 importFrom(stats,predict)
-importFrom(stringi,stri_detect_regex)
-importFrom(stringi,stri_match_first_regex)
-importFrom(stringi,stri_replace_all_regex)
-importFrom(stringi,stri_replace_first_regex)
-importFrom(stringi,stri_split_regex)
 importFrom(utils,head)
 importFrom(utils,object.size)
 importFrom(utils,str)

diff --git a/R-package/R/utils.R b/R-package/R/utils.R
@@ -167,9 +167,8 @@ xgb.iter.eval <- function(booster_handle, watchlist, iter, feval = NULL) {
   evnames <- names(watchlist)
   if (is.null(feval)) {
     msg <- .Call(XGBoosterEvalOneIter_R, booster_handle, as.integer(iter), watchlist, as.list(evnames))
-    msg <- stri_split_regex(msg, '(\\s+|:|\\s+)')[[1]][-1]
-    res <- as.numeric(msg[c(FALSE, TRUE)]) # even indices are the values
-    names(res) <- msg[c(TRUE, FALSE)]      # odds are the names
+    mat <- matrix(strsplit(msg, '\\s+|:')[[1]][-1], nrow = 2)
+    res <- structure(as.numeric(mat[2, ]), names = mat[1, ])
   } else {
     res <- sapply(seq_along(watchlist), function(j) {
       w <- watchlist[[j]]

diff --git a/R-package/R/xgb.dump.R b/R-package/R/xgb.dump.R
@@ -56,10 +56,10 @@ xgb.dump <- function(model, fname = NULL, fmap = "", with_stats=FALSE,
                       as.character(dump_format))
 
   if (is.null(fname))
-    model_dump <- stri_replace_all_regex(model_dump, '\t', '')
+    model_dump <- gsub('\t', '', model_dump, fixed = TRUE)
 
   if (dump_format == "text")
-    model_dump <- unlist(stri_split_regex(model_dump, '\n'))
+    model_dump <- unlist(strsplit(model_dump, '\n', fixed = TRUE))
 
   model_dump <- grep('^\\s*$', model_dump, invert = TRUE, value = TRUE)
 

diff --git a/R-package/R/xgb.model.dt.tree.R b/R-package/R/xgb.model.dt.tree.R
@@ -87,11 +87,11 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
   }
 
   if (length(text) < 2 ||
-      sum(stri_detect_regex(text, 'yes=(\\d+),no=(\\d+)')) < 1) {
+      sum(grepl('yes=(\\d+),no=(\\d+)', text)) < 1) {
     stop("Non-tree model detected! This function can only be used with tree models.")
   }
 
-  position <- which(!is.na(stri_match_first_regex(text, "booster")))
+  position <- which(grepl("booster", text, fixed = TRUE))
 
   add.tree.id <- function(node, tree) if (use_int_id) node else paste(tree, node, sep = "-")
 
@@ -108,20 +108,21 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
   }
   td <- td[Tree %in% trees & !grepl('^booster', t)]
 
-  td[, Node := stri_match_first_regex(t, "(\\d+):")[, 2] %>% as.integer]
+  td[, Node := as.integer(sub("^([0-9]+):.*", "\\1", t))]
   if (!use_int_id) td[, ID := add.tree.id(Node, Tree)]
-  td[, isLeaf := !is.na(stri_match_first_regex(t, "leaf"))]
+  td[, isLeaf := grepl("leaf", t, fixed = TRUE)]
 
   # parse branch lines
   branch_rx <- paste0("f(\\d+)<(", anynumber_regex, ")\\] yes=(\\d+),no=(\\d+),missing=(\\d+),",
                       "gain=(", anynumber_regex, "),cover=(", anynumber_regex, ")")
   branch_cols <- c("Feature", "Split", "Yes", "No", "Missing", "Quality", "Cover")
   td[isLeaf == FALSE,
      (branch_cols) := {
-      # skip some indices with spurious capture groups from anynumber_regex
-      xtr <- stri_match_first_regex(t, branch_rx)[, c(2, 3, 5, 6, 7, 8, 10), drop = FALSE]
-      xtr[, 3:5] <- add.tree.id(xtr[, 3:5], Tree)
-      lapply(seq_len(ncol(xtr)), function(i) xtr[, i])
+       matches <- regmatches(t, regexec(branch_rx, t))
+       # skip some indices with spurious capture groups from anynumber_regex
+       xtr <- do.call(rbind, matches)[, c(2, 3, 5, 6, 7, 8, 10), drop = FALSE]
+       xtr[, 3:5] <- add.tree.id(xtr[, 3:5], Tree)
+       as.data.table(xtr)
     }]
   # assign feature_names when available
   if (!is.null(feature_names)) {
@@ -135,8 +136,9 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
   leaf_cols <- c("Feature", "Quality", "Cover")
   td[isLeaf == TRUE,
      (leaf_cols) := {
-      xtr <- stri_match_first_regex(t, leaf_rx)[, c(2, 4)]
-      c("Leaf", lapply(seq_len(ncol(xtr)), function(i) xtr[, i]))
+       matches <- regmatches(t, regexec(leaf_rx, t))
+       xtr <- do.call(rbind, matches)[, c(2, 4)]
+       c("Leaf", as.data.table(xtr))
     }]
 
   # convert some columns to numeric

diff --git a/R-package/R/xgb.plot.multi.trees.R b/R-package/R/xgb.plot.multi.trees.R
@@ -67,7 +67,7 @@ xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5,
 
   # first number of the path represents the tree, then the following numbers are related to the path to follow
   # root init
-  root.nodes <- tree.matrix[stri_detect_regex(ID, "\\d+-0"), ID]
+  root.nodes <- tree.matrix[Node == 0, ID]
   tree.matrix[ID %in% root.nodes, abs.node.position := root.nodes]
 
   precedent.nodes <- root.nodes
@@ -86,11 +86,8 @@ xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5,
   tree.matrix[!is.na(Yes), Yes := paste0(abs.node.position, "_0")]
   tree.matrix[!is.na(No), No := paste0(abs.node.position, "_1")]
 
-  remove.tree <- . %>% stri_replace_first_regex(pattern = "^\\d+-", replacement = "")
-
-  tree.matrix[, `:=`(abs.node.position = remove.tree(abs.node.position),
-                     Yes = remove.tree(Yes),
-                     No = remove.tree(No))]
+  for (nm in c("abs.node.position", "Yes", "No"))
+    data.table::set(tree.matrix, j = nm, value = sub("^\\d+-", "", tree.matrix[[nm]]))
 
   nodes.dt <- tree.matrix[
         , .(Quality = sum(Quality))

diff --git a/R-package/R/xgboost.R b/R-package/R/xgboost.R
@@ -91,11 +91,6 @@ NULL
 #' @importFrom data.table setkeyv
 #' @importFrom data.table setnames
 #' @importFrom magrittr %>%
-#' @importFrom stringi stri_detect_regex
-#' @importFrom stringi stri_match_first_regex
-#' @importFrom stringi stri_replace_first_regex
-#' @importFrom stringi stri_replace_all_regex
-#' @importFrom stringi stri_split_regex
 #' @importFrom utils object.size str tail
 #' @importFrom stats predict
 #' @importFrom stats median

diff --git a/R-package/tests/testthat/test_custom_objective.R b/R-package/tests/testthat/test_custom_objective.R
@@ -47,7 +47,7 @@ test_that("custom objective with early stop works", {
   bst <- xgb.train(param, dtrain, 10, watchlist)
   expect_equal(class(bst), "xgb.Booster")
   train_log <- bst$evaluation_log$train_error
-  expect_true(all(diff(train_log)) <= 0)
+  expect_true(all(diff(train_log) <= 0))
 })
 
 test_that("custom objective using DMatrix attr works", {

diff --git a/R-package/tests/testthat/test_gc_safety.R b/R-package/tests/testthat/test_gc_safety.R
@@ -9,7 +9,8 @@ test_that("train and prediction when gctorture is on", {
   test <- agaricus.test
   gctorture(TRUE)
   bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
-  eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
+                 eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
   pred <- predict(bst, test$data)
   gctorture(FALSE)
+  expect_length(pred, length(test$label))
 })
diff --git a/R-package/tests/testthat/test_helpers.R b/R-package/tests/testthat/test_helpers.R
@@ -335,8 +335,8 @@ test_that("xgb.model.dt.tree and xgb.importance work with a single split model",
 })
 
 test_that("xgb.plot.tree works with and without feature names", {
-  xgb.plot.tree(feature_names = feature.names, model = bst.Tree)
-  xgb.plot.tree(model = bst.Tree)
+  expect_silent(xgb.plot.tree(feature_names = feature.names, model = bst.Tree))
+  expect_silent(xgb.plot.tree(model = bst.Tree))
 })
 
 test_that("xgb.plot.multi.trees works with and without feature names", {
@@ -390,8 +390,8 @@ test_that("xgb.plot.shap works", {
 })
 
 test_that("xgb.plot.shap.summary works", {
-  xgb.plot.shap.summary(data = sparse_matrix, model = bst.Tree, top_n = 2)
-  xgb.ggplot.shap.summary(data = sparse_matrix, model = bst.Tree, top_n = 2)
+  expect_silent(xgb.plot.shap.summary(data = sparse_matrix, model = bst.Tree, top_n = 2))
+  expect_silent(xgb.ggplot.shap.summary(data = sparse_matrix, model = bst.Tree, top_n = 2))
 })
 
 test_that("check.deprecation works", {