Skip to content

Commit

Permalink
Merge pull request #1 from YuanTingHsieh/add_alternate_vertical_splits
Browse files Browse the repository at this point in the history
Add alternate vertical splits
  • Loading branch information
ZiyueXu77 authored Feb 20, 2024
2 parents e9eef15 + 6fcbe02 commit 967e307
Show file tree
Hide file tree
Showing 50 changed files with 881 additions and 239 deletions.
39 changes: 39 additions & 0 deletions .github/workflows/i386.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
name: XGBoost-i386-test

on: [push, pull_request]

permissions:
contents: read # to fetch code (actions/checkout)

jobs:
build-32bit:
name: Build 32-bit
runs-on: ubuntu-latest
services:
registry:
image: registry:2
ports:
- 5000:5000
steps:
- uses: actions/checkout@v2.5.0
with:
submodules: 'true'
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
with:
driver-opts: network=host
- name: Build and push container
uses: docker/build-push-action@v5
with:
context: .
file: tests/ci_build/Dockerfile.i386
push: true
tags: localhost:5000/xgboost/build-32bit:latest
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Build XGBoost
run: |
docker run --rm -v $PWD:/workspace -w /workspace \
-e CXXFLAGS='-Wno-error=overloaded-virtual -Wno-error=maybe-uninitialized -Wno-error=redundant-move' \
localhost:5000/xgboost/build-32bit:latest \
tests/ci_build/build_via_cmake.sh
3 changes: 0 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,6 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
message(FATAL_ERROR "Need Clang 9.0 or newer to build XGBoost")
endif()
endif()
if(CMAKE_SIZE_OF_VOID_P EQUAL 4)
message(FATAL_ERROR "XGBoost does not support 32-bit archs. Please use 64-bit arch instead.")
endif()

include(${xgboost_SOURCE_DIR}/cmake/PrefetchIntrinsics.cmake)
find_prefetch_intrinsics()
Expand Down
5 changes: 2 additions & 3 deletions R-package/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ S3method(print,xgb.DMatrix)
S3method(print,xgb.cv.synchronous)
S3method(setinfo,xgb.Booster)
S3method(setinfo,xgb.DMatrix)
S3method(slice,xgb.DMatrix)
S3method(variable.names,xgb.Booster)
export("xgb.attr<-")
export("xgb.attributes<-")
Expand All @@ -30,13 +29,12 @@ export(cb.reset.parameters)
export(cb.save.model)
export(getinfo)
export(setinfo)
export(slice)
export(xgb.DMatrix)
export(xgb.DMatrix.hasinfo)
export(xgb.DMatrix.save)
export(xgb.DataBatch)
export(xgb.DataIter)
export(xgb.ExternalDMatrix)
export(xgb.ProxyDMatrix)
export(xgb.QuantileDMatrix)
export(xgb.QuantileDMatrix.from_iterator)
export(xgb.attr)
Expand Down Expand Up @@ -70,6 +68,7 @@ export(xgb.save)
export(xgb.save.raw)
export(xgb.set.config)
export(xgb.slice.Booster)
export(xgb.slice.DMatrix)
export(xgb.train)
export(xgboost)
import(methods)
Expand Down
100 changes: 99 additions & 1 deletion R-package/R/xgb.Booster.R
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,21 @@ xgb.get.handle <- function(object) {
#' If passing "all", will use all of the rounds regardless of whether the model had early stopping or not.
#' @param strict_shape Default is `FALSE`. When set to `TRUE`, the output
#' type and shape of predictions are invariant to the model type.
#' @param validate_features When `TRUE`, validate that the Booster's and newdata's feature_names
#' match (only applicable when both `object` and `newdata` have feature names).
#'
#' If the column names differ and `newdata` is not an `xgb.DMatrix`, will try to reorder
#' the columns in `newdata` to match with the booster's.
#'
#' If the booster has feature types and `newdata` is either an `xgb.DMatrix` or `data.frame`,
#' will additionally verify that categorical columns are of the correct type in `newdata`,
#' throwing an error if they do not match.
#'
#' If passing `FALSE`, it is assumed that the feature names and types are the same,
#' and come in the same order as in the training data.
#'
#' Note that this check might add some sizable latency to the predictions, so it's
#' recommended to disable it for performance-sensitive applications.
#' @param ... Not used.
#'
#' @details
Expand Down Expand Up @@ -271,7 +286,11 @@ xgb.get.handle <- function(object) {
#' @export
predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FALSE,
predleaf = FALSE, predcontrib = FALSE, approxcontrib = FALSE, predinteraction = FALSE,
reshape = FALSE, training = FALSE, iterationrange = NULL, strict_shape = FALSE, ...) {
reshape = FALSE, training = FALSE, iterationrange = NULL, strict_shape = FALSE,
validate_features = FALSE, ...) {
if (validate_features) {
newdata <- validate.features(object, newdata)
}
if (!inherits(newdata, "xgb.DMatrix")) {
nthread <- xgb.nthread(object)
newdata <- xgb.DMatrix(
Expand Down Expand Up @@ -418,6 +437,85 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
return(arr)
}

validate.features <- function(bst, newdata) {
if (is.character(newdata)) {
# this will be encountered when passing file paths
return(newdata)
}
if (inherits(newdata, "sparseVector")) {
# in this case, newdata won't have metadata
return(newdata)
}
if (is.vector(newdata)) {
newdata <- as.matrix(newdata)
}

booster_names <- getinfo(bst, "feature_name")
checked_names <- FALSE
if (NROW(booster_names)) {

try_reorder <- FALSE
if (inherits(newdata, "xgb.DMatrix")) {
curr_names <- getinfo(newdata, "feature_name")
} else {
curr_names <- colnames(newdata)
try_reorder <- TRUE
}

if (NROW(curr_names)) {
checked_names <- TRUE

if (length(curr_names) != length(booster_names) || any(curr_names != booster_names)) {

if (!try_reorder) {
stop("Feature names in 'newdata' do not match with booster's.")
} else {
if (inherits(newdata, "data.table")) {
newdata <- newdata[, booster_names, with = FALSE]
} else {
newdata <- newdata[, booster_names, drop = FALSE]
}
}

}

} # if (NROW(curr_names)) {

} # if (NROW(booster_names)) {

if (inherits(newdata, c("data.frame", "xgb.DMatrix"))) {

booster_types <- getinfo(bst, "feature_type")
if (!NROW(booster_types)) {
# Note: types in the booster are optional. Other interfaces
# might not even save it as booster attributes for example,
# even if the model uses categorical features.
return(newdata)
}
if (inherits(newdata, "xgb.DMatrix")) {
curr_types <- getinfo(newdata, "feature_type")
if (length(curr_types) != length(booster_types) || any(curr_types != booster_types)) {
stop("Feature types in 'newdata' do not match with booster's.")
}
}
if (inherits(newdata, "data.frame")) {
is_factor <- sapply(newdata, is.factor)
if (any(is_factor != (booster_types == "c"))) {
stop(
paste0(
"Feature types in 'newdata' do not match with booster's for same columns (by ",
ifelse(checked_names, "name", "position"),
")."
)
)
}
}

}

return(newdata)
}


#' @title Accessors for serializable attributes of a model
#'
Expand Down
Loading

0 comments on commit 967e307

Please sign in to comment.