From b1233ef2aedee7ce704f51e0b081591ee06283a3 Mon Sep 17 00:00:00 2001 From: Philip Cho Date: Wed, 5 Sep 2018 12:30:21 -0700 Subject: [PATCH] Backport note about predict() behavior of DART booster --- python-package/xgboost/core.py | 20 ++++++++++++++++---- python-package/xgboost/sklearn.py | 22 ++++++++++++++++++---- 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index daf7bea44d06..ae16d6fa0c98 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -996,10 +996,22 @@ def predict(self, data, output_margin=False, ntree_limit=0, pred_leaf=False, """ Predict with data. - NOTE: This function is not thread safe. - For each booster object, predict can only be called from one thread. - If you want to run prediction using multiple thread, call bst.copy() to make copies - of model object and then call predict + .. note:: This function is not thread safe. + + For each booster object, predict can only be called from one thread. + If you want to run prediction using multiple thread, call ``bst.copy()`` to make copies + of model object and then call ``predict()``. + + .. note:: Using ``predict()`` with DART booster + + If the booster object is DART type, ``predict()`` will perform dropouts, i.e. only + some of the trees will be evaluated. This will produce incorrect results if ``data`` is + not the training data. To obtain correct results on test sets, set ``ntree_limit`` to + a nonzero value, e.g. + + .. code-block:: python + + preds = bst.predict(dtest, ntree_limit=num_round) Parameters ---------- diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index 173f8e51024d..69784b68299f 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -578,10 +578,24 @@ def fit(self, X, y, sample_weight=None, eval_set=None, eval_metric=None, def predict(self, data, output_margin=False, ntree_limit=0): """ Predict with `data`. - NOTE: This function is not thread safe. - For each booster object, predict can only be called from one thread. - If you want to run prediction using multiple thread, call xgb.copy() to make copies - of model object and then call predict + + .. note:: This function is not thread safe. + + For each booster object, predict can only be called from one thread. + If you want to run prediction using multiple thread, call ``xgb.copy()`` to make copies + of model object and then call ``predict()``. + + .. note:: Using ``predict()`` with DART booster + + If the booster object is DART type, ``predict()`` will perform dropouts, i.e. only + some of the trees will be evaluated. This will produce incorrect results if ``data`` is + not the training data. To obtain correct results on test sets, set ``ntree_limit`` to + a nonzero value, e.g. + + .. code-block:: python + + preds = bst.predict(dtest, ntree_limit=num_round) + Parameters ---------- data : DMatrix