Skip to content

Commit

Permalink
Fix prediction with cat data in sklearn interface. (#7306)
Browse files Browse the repository at this point in the history
* Specify DMatrix parameter for pre-processing dataframe.
* Add document about the behaviour of prediction.
  • Loading branch information
trivialfis authored Oct 12, 2021
1 parent 89d87e5 commit 5b17bb0
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 4 deletions.
20 changes: 16 additions & 4 deletions python-package/xgboost/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,9 @@ def inner(preds: np.ndarray, dmatrix: DMatrix) -> Tuple[np.ndarray, np.ndarray]:
Device ordinal.
validate_parameters : Optional[bool]
Give warnings for unknown parameter.
predictor : Optional[str]
Force XGBoost to use specific predictor, available choices are [cpu_predictor,
gpu_predictor].
enable_categorical : bool
.. versionadded:: 1.5.0
Expand Down Expand Up @@ -807,7 +809,11 @@ def _can_use_inplace_predict(self) -> bool:
# Inplace predict doesn't handle as many data types as DMatrix, but it's
# sufficient for dask interface where input is simpiler.
predictor = self.get_params().get("predictor", None)
if predictor in ("auto", None) and self.booster != "gblinear":
if (
not self.enable_categorical
and predictor in ("auto", None)
and self.booster != "gblinear"
):
return True
return False

Expand All @@ -834,7 +840,9 @@ def predict(
iteration_range: Optional[Tuple[int, int]] = None,
) -> np.ndarray:
"""Predict with `X`. If the model is trained with early stopping, then `best_iteration`
is used automatically.
is used automatically. For tree models, when data is on GPU, like cupy array or
cuDF dataframe and `predictor` is not specified, the prediction is run on GPU
automatically, otherwise it will run on CPU.
.. note:: This function is only thread safe for `gbtree` and `dart`.
Expand Down Expand Up @@ -862,6 +870,7 @@ def predict(
Returns
-------
prediction
"""
iteration_range = _convert_ntree_limit(
self.get_booster(), ntree_limit, iteration_range
Expand All @@ -886,7 +895,10 @@ def predict(
pass

test = DMatrix(
X, base_margin=base_margin, missing=self.missing, nthread=self.n_jobs
X, base_margin=base_margin,
missing=self.missing,
nthread=self.n_jobs,
enable_categorical=self.enable_categorical
)
return self.get_booster().predict(
data=test,
Expand Down
1 change: 1 addition & 0 deletions tests/python-gpu/test_gpu_with_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def test_categorical():
)
X = pd.DataFrame(X.todense()).astype("category")
clf.fit(X, y)
assert not clf._can_use_inplace_predict()

with tempfile.TemporaryDirectory() as tempdir:
model = os.path.join(tempdir, "categorial.json")
Expand Down

0 comments on commit 5b17bb0

Please sign in to comment.