From 2e64333a84d8635838b6d95f5da6e06097e97852 Mon Sep 17 00:00:00 2001 From: david-cortes Date: Tue, 12 Dec 2023 20:41:04 +0100 Subject: [PATCH] clarify effect of enable_categorical --- R-package/R/xgb.DMatrix.R | 14 ++++++++++++-- R-package/man/xgb.DMatrix.Rd | 14 ++++++++++++-- python-package/xgboost/core.py | 14 ++++++++++++-- 3 files changed, 36 insertions(+), 6 deletions(-) diff --git a/R-package/R/xgb.DMatrix.R b/R-package/R/xgb.DMatrix.R index 602164afe4b7..fead30413159 100644 --- a/R-package/R/xgb.DMatrix.R +++ b/R-package/R/xgb.DMatrix.R @@ -27,14 +27,24 @@ #' @param label_lower_bound Lower bound for survival training. #' @param label_upper_bound Upper bound for survival training. #' @param feature_weights Set feature weights for column sampling. +#' @param enable_categorical Experimental support of specializing for categorical features. +#' +#' If passing 'TRUE' and 'data' is a data frame, +#' columns of categorical types will automatically +#' be set to be of categorical type (feature_type='c') in the resulting DMatrix. +#' +#' If passing 'FALSE' and 'data' is a data frame with categorical columns, +#' it will result in an error being thrown. +#' +#' If 'data' is not a data frame, this argument is ignored. +#' +#' JSON/UBJSON serialization format is required for this. #' #' @details #' Note that DMatrix objects are not serializable through R functions such as \code{saveRDS} or \code{save}. #' If a DMatrix gets serialized and then de-serialized (for example, when saving data in an R session or caching #' chunks in an Rmd file), the resulting object will not be usable anymore and will need to be reconstructed #' from the original source of data. -#' @param enable_categorical Experimental support of specializing for -#' categorical features. JSON/UBJSON serialization format is required. #' #' @examples #' data(agaricus.train, package='xgboost') diff --git a/R-package/man/xgb.DMatrix.Rd b/R-package/man/xgb.DMatrix.Rd index 619f5d730a8d..95cc8d3cd34f 100644 --- a/R-package/man/xgb.DMatrix.Rd +++ b/R-package/man/xgb.DMatrix.Rd @@ -58,8 +58,18 @@ frame and matrix.} \item{feature_weights}{Set feature weights for column sampling.} -\item{enable_categorical}{Experimental support of specializing for -categorical features. JSON/UBJSON serialization format is required.} +\item{enable_categorical}{Experimental support of specializing for categorical features. + + If passing 'TRUE' and 'data' is a data frame, + columns of categorical types will automatically + be set to be of categorical type (feature_type='c') in the resulting DMatrix. + + If passing 'FALSE' and 'data' is a data frame with categorical columns, + it will result in an error being thrown. + + If 'data' is not a data frame, this argument is ignored. + + JSON/UBJSON serialization format is required for this.} } \description{ Construct xgb.DMatrix object from either a dense matrix, a sparse matrix, or a local file. diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index 3c864a1c89f2..1bddadbbe63d 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -822,8 +822,18 @@ def __init__( .. note:: This parameter is experimental - Experimental support of specializing for categorical features. JSON/UBJSON - serialization format is required. + Experimental support of specializing for categorical features. + + If passing 'True' and 'data' is a data frame (from supported libraries + such as Pandas or Modin), columns of categorical types will automatically + be set to be of categorical type (feature_type='c') in the resulting DMatrix. + + If passing 'False' and 'data' is a data frame with categorical columns, + it will result in an error being thrown. + + If 'data' is not a data frame, this argument is ignored. + + JSON/UBJSON serialization format is required for this. """ if group is not None and qid is not None: