diff --git a/Makefile b/Makefile index 63319ba3ef63..d9232b4d83c1 100644 --- a/Makefile +++ b/Makefile @@ -138,12 +138,14 @@ Rpack: clean_all rm xgboost/remove_warning_suppression_pragma.sh rm -rfv xgboost/tests/helper_scripts/ +R ?= R + Rbuild: Rpack - R CMD build --no-build-vignettes xgboost + $(R) CMD build --no-build-vignettes xgboost rm -rf xgboost Rcheck: Rbuild - R CMD check --as-cran xgboost*.tar.gz + $(R) CMD check --as-cran xgboost*.tar.gz -include build/*.d -include build/*/*.d diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index f599a57858be..143ec7a12670 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -63,6 +63,5 @@ Imports: methods, data.table (>= 1.9.6), magrittr (>= 1.5), - stringi (>= 0.5.2) RoxygenNote: 7.1.1 SystemRequirements: GNU make, C++14 diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE index 1795742c8160..f17ee76ab449 100644 --- a/R-package/NAMESPACE +++ b/R-package/NAMESPACE @@ -81,11 +81,6 @@ importFrom(graphics,title) importFrom(magrittr,"%>%") importFrom(stats,median) importFrom(stats,predict) -importFrom(stringi,stri_detect_regex) -importFrom(stringi,stri_match_first_regex) -importFrom(stringi,stri_replace_all_regex) -importFrom(stringi,stri_replace_first_regex) -importFrom(stringi,stri_split_regex) importFrom(utils,head) importFrom(utils,object.size) importFrom(utils,str) diff --git a/R-package/R/utils.R b/R-package/R/utils.R index 846cc1f4404e..a903de36bb17 100644 --- a/R-package/R/utils.R +++ b/R-package/R/utils.R @@ -167,9 +167,8 @@ xgb.iter.eval <- function(booster_handle, watchlist, iter, feval = NULL) { evnames <- names(watchlist) if (is.null(feval)) { msg <- .Call(XGBoosterEvalOneIter_R, booster_handle, as.integer(iter), watchlist, as.list(evnames)) - msg <- stri_split_regex(msg, '(\\s+|:|\\s+)')[[1]][-1] - res <- as.numeric(msg[c(FALSE, TRUE)]) # even indices are the values - names(res) <- msg[c(TRUE, FALSE)] # odds are the names + mat <- matrix(strsplit(msg, '\\s+|:')[[1]][-1], nrow = 2) + res <- structure(as.numeric(mat[2, ]), names = mat[1, ]) } else { res <- sapply(seq_along(watchlist), function(j) { w <- watchlist[[j]] diff --git a/R-package/R/xgb.dump.R b/R-package/R/xgb.dump.R index 0e8ae4f3edeb..d79799b3cb19 100644 --- a/R-package/R/xgb.dump.R +++ b/R-package/R/xgb.dump.R @@ -56,10 +56,10 @@ xgb.dump <- function(model, fname = NULL, fmap = "", with_stats=FALSE, as.character(dump_format)) if (is.null(fname)) - model_dump <- stri_replace_all_regex(model_dump, '\t', '') + model_dump <- gsub('\t', '', model_dump, fixed = TRUE) if (dump_format == "text") - model_dump <- unlist(stri_split_regex(model_dump, '\n')) + model_dump <- unlist(strsplit(model_dump, '\n', fixed = TRUE)) model_dump <- grep('^\\s*$', model_dump, invert = TRUE, value = TRUE) diff --git a/R-package/R/xgb.model.dt.tree.R b/R-package/R/xgb.model.dt.tree.R index af872c3cbeb4..df0ce54dc310 100644 --- a/R-package/R/xgb.model.dt.tree.R +++ b/R-package/R/xgb.model.dt.tree.R @@ -87,11 +87,11 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL, } if (length(text) < 2 || - sum(stri_detect_regex(text, 'yes=(\\d+),no=(\\d+)')) < 1) { + sum(grepl('yes=(\\d+),no=(\\d+)', text)) < 1) { stop("Non-tree model detected! This function can only be used with tree models.") } - position <- which(!is.na(stri_match_first_regex(text, "booster"))) + position <- which(grepl("booster", text, fixed = TRUE)) add.tree.id <- function(node, tree) if (use_int_id) node else paste(tree, node, sep = "-") @@ -108,9 +108,9 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL, } td <- td[Tree %in% trees & !grepl('^booster', t)] - td[, Node := stri_match_first_regex(t, "(\\d+):")[, 2] %>% as.integer] + td[, Node := as.integer(sub("^([0-9]+):.*", "\\1", t))] if (!use_int_id) td[, ID := add.tree.id(Node, Tree)] - td[, isLeaf := !is.na(stri_match_first_regex(t, "leaf"))] + td[, isLeaf := grepl("leaf", t, fixed = TRUE)] # parse branch lines branch_rx <- paste0("f(\\d+)<(", anynumber_regex, ")\\] yes=(\\d+),no=(\\d+),missing=(\\d+),", @@ -118,10 +118,11 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL, branch_cols <- c("Feature", "Split", "Yes", "No", "Missing", "Quality", "Cover") td[isLeaf == FALSE, (branch_cols) := { - # skip some indices with spurious capture groups from anynumber_regex - xtr <- stri_match_first_regex(t, branch_rx)[, c(2, 3, 5, 6, 7, 8, 10), drop = FALSE] - xtr[, 3:5] <- add.tree.id(xtr[, 3:5], Tree) - lapply(seq_len(ncol(xtr)), function(i) xtr[, i]) + matches <- regmatches(t, regexec(branch_rx, t)) + # skip some indices with spurious capture groups from anynumber_regex + xtr <- do.call(rbind, matches)[, c(2, 3, 5, 6, 7, 8, 10), drop = FALSE] + xtr[, 3:5] <- add.tree.id(xtr[, 3:5], Tree) + as.data.table(xtr) }] # assign feature_names when available if (!is.null(feature_names)) { @@ -135,8 +136,9 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL, leaf_cols <- c("Feature", "Quality", "Cover") td[isLeaf == TRUE, (leaf_cols) := { - xtr <- stri_match_first_regex(t, leaf_rx)[, c(2, 4)] - c("Leaf", lapply(seq_len(ncol(xtr)), function(i) xtr[, i])) + matches <- regmatches(t, regexec(leaf_rx, t)) + xtr <- do.call(rbind, matches)[, c(2, 4)] + c("Leaf", as.data.table(xtr)) }] # convert some columns to numeric diff --git a/R-package/R/xgb.plot.multi.trees.R b/R-package/R/xgb.plot.multi.trees.R index fc02baee1352..c884281fcfaa 100644 --- a/R-package/R/xgb.plot.multi.trees.R +++ b/R-package/R/xgb.plot.multi.trees.R @@ -67,7 +67,7 @@ xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5, # first number of the path represents the tree, then the following numbers are related to the path to follow # root init - root.nodes <- tree.matrix[stri_detect_regex(ID, "\\d+-0"), ID] + root.nodes <- tree.matrix[Node == 0, ID] tree.matrix[ID %in% root.nodes, abs.node.position := root.nodes] precedent.nodes <- root.nodes @@ -86,11 +86,8 @@ xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5, tree.matrix[!is.na(Yes), Yes := paste0(abs.node.position, "_0")] tree.matrix[!is.na(No), No := paste0(abs.node.position, "_1")] - remove.tree <- . %>% stri_replace_first_regex(pattern = "^\\d+-", replacement = "") - - tree.matrix[, `:=`(abs.node.position = remove.tree(abs.node.position), - Yes = remove.tree(Yes), - No = remove.tree(No))] + for (nm in c("abs.node.position", "Yes", "No")) + data.table::set(tree.matrix, j = nm, value = sub("^\\d+-", "", tree.matrix[[nm]])) nodes.dt <- tree.matrix[ , .(Quality = sum(Quality)) diff --git a/R-package/R/xgboost.R b/R-package/R/xgboost.R index 2fddfa403bb6..71181c1f1e00 100644 --- a/R-package/R/xgboost.R +++ b/R-package/R/xgboost.R @@ -91,11 +91,6 @@ NULL #' @importFrom data.table setkeyv #' @importFrom data.table setnames #' @importFrom magrittr %>% -#' @importFrom stringi stri_detect_regex -#' @importFrom stringi stri_match_first_regex -#' @importFrom stringi stri_replace_first_regex -#' @importFrom stringi stri_replace_all_regex -#' @importFrom stringi stri_split_regex #' @importFrom utils object.size str tail #' @importFrom stats predict #' @importFrom stats median diff --git a/R-package/tests/testthat/test_custom_objective.R b/R-package/tests/testthat/test_custom_objective.R index c96dee4e9129..d98e7045a723 100644 --- a/R-package/tests/testthat/test_custom_objective.R +++ b/R-package/tests/testthat/test_custom_objective.R @@ -47,7 +47,7 @@ test_that("custom objective with early stop works", { bst <- xgb.train(param, dtrain, 10, watchlist) expect_equal(class(bst), "xgb.Booster") train_log <- bst$evaluation_log$train_error - expect_true(all(diff(train_log)) <= 0) + expect_true(all(diff(train_log) <= 0)) }) test_that("custom objective using DMatrix attr works", { diff --git a/R-package/tests/testthat/test_gc_safety.R b/R-package/tests/testthat/test_gc_safety.R index 210674e19989..fb80757c5258 100644 --- a/R-package/tests/testthat/test_gc_safety.R +++ b/R-package/tests/testthat/test_gc_safety.R @@ -9,7 +9,8 @@ test_that("train and prediction when gctorture is on", { test <- agaricus.test gctorture(TRUE) bst <- xgboost(data = train$data, label = train$label, max.depth = 2, - eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic") + eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic") pred <- predict(bst, test$data) gctorture(FALSE) + expect_length(pred, length(test$label)) }) diff --git a/R-package/tests/testthat/test_helpers.R b/R-package/tests/testthat/test_helpers.R index 86c0efd0207e..b0a85a9fe10a 100644 --- a/R-package/tests/testthat/test_helpers.R +++ b/R-package/tests/testthat/test_helpers.R @@ -335,8 +335,8 @@ test_that("xgb.model.dt.tree and xgb.importance work with a single split model", }) test_that("xgb.plot.tree works with and without feature names", { - xgb.plot.tree(feature_names = feature.names, model = bst.Tree) - xgb.plot.tree(model = bst.Tree) + expect_silent(xgb.plot.tree(feature_names = feature.names, model = bst.Tree)) + expect_silent(xgb.plot.tree(model = bst.Tree)) }) test_that("xgb.plot.multi.trees works with and without feature names", { @@ -390,8 +390,8 @@ test_that("xgb.plot.shap works", { }) test_that("xgb.plot.shap.summary works", { - xgb.plot.shap.summary(data = sparse_matrix, model = bst.Tree, top_n = 2) - xgb.ggplot.shap.summary(data = sparse_matrix, model = bst.Tree, top_n = 2) + expect_silent(xgb.plot.shap.summary(data = sparse_matrix, model = bst.Tree, top_n = 2)) + expect_silent(xgb.ggplot.shap.summary(data = sparse_matrix, model = bst.Tree, top_n = 2)) }) test_that("check.deprecation works", {