dmlc · trivialfis · Feb 13, 2020 · Feb 13, 2020 · Feb 13, 2020
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -49,7 +49,7 @@ option(USE_SANITIZER "Use santizer flags" OFF)
 option(SANITIZER_PATH "Path to sanitizes.")
 set(ENABLED_SANITIZERS "address" "leak" CACHE STRING
   "Semicolon separated list of sanitizer names. E.g 'address;leak'. Supported sanitizers are
-address, leak and thread.")
+address, leak, undefined and thread.")
 ## Plugins
 option(PLUGIN_LZ4 "Build lz4 plugin" OFF)
 option(PLUGIN_DENSE_PARSER "Build dense parser plugin" OFF)

diff --git a/R-package/R/xgb.Booster.R b/R-package/R/xgb.Booster.R
@@ -139,6 +139,8 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
 #' @param reshape whether to reshape the vector of predictions to a matrix form when there are several
 #'        prediction outputs per case. This option has no effect when either of predleaf, predcontrib,
 #'        or predinteraction flags is TRUE.
+#' @param training whether is the prediction result used for training.  For dart booster,
+#'        training predicting will perform dropout.
 #' @param ... Parameters passed to \code{predict.xgb.Booster}
 #'
 #' @details

diff --git a/R-package/man/agaricus.test.Rd b/R-package/man/agaricus.test.Rd
diff --git a/R-package/man/agaricus.train.Rd b/R-package/man/agaricus.train.Rd
diff --git a/R-package/man/predict.xgb.Booster.Rd b/R-package/man/predict.xgb.Booster.Rd
diff --git a/R-package/tests/testthat/test_custom_objective.R b/R-package/tests/testthat/test_custom_objective.R
@@ -31,7 +31,6 @@ num_round <- 2
 test_that("custom objective works", {
   bst <- xgb.train(param, dtrain, num_round, watchlist)
   expect_equal(class(bst), "xgb.Booster")
-  expect_equal(length(bst$raw), 1100)
   expect_false(is.null(bst$evaluation_log))
   expect_false(is.null(bst$evaluation_log$eval_error))
   expect_lt(bst$evaluation_log[num_round, eval_error], 0.03)
@@ -58,5 +57,4 @@ test_that("custom objective using DMatrix attr works", {
   param$objective = logregobjattr
   bst <- xgb.train(param, dtrain, num_round, watchlist)
   expect_equal(class(bst), "xgb.Booster")
-  expect_equal(length(bst$raw), 1100)
 })
diff --git a/doc/python/convert_090to100.py b/doc/python/convert_090to100.py
@@ -0,0 +1,79 @@
+'''This is a simple script that converts a pickled XGBoost
+Scikit-Learn interface object from 0.90 to a native model.  Pickle
+format is not stable as it's a direct serialization of Python object.
+We advice not to use it when stability is needed.
+
+'''
+import pickle
+import json
+import os
+import argparse
+import numpy as np
+import xgboost
+import warnings
+
+
+def save_label_encoder(le):
+    '''Save the label encoder in XGBClassifier'''
+    meta = dict()
+    for k, v in le.__dict__.items():
+        if isinstance(v, np.ndarray):
+            meta[k] = v.tolist()
+        else:
+            meta[k] = v
+    return meta
+
+
+def xgboost_skl_90to100(skl_model):
+    '''Extract the model and related metadata in SKL model.'''
+    model = {}
+    with open(skl_model, 'rb') as fd:
+        old = pickle.load(fd)
+        if not isinstance(old, xgboost.XGBModel):
+            raise TypeError(
+                'The script only handes Scikit-Learn interface object')
+
+    # Save Scikit-Learn specific Python attributes into a JSON document.
+    for k, v in old.__dict__.items():
+        if k == '_le':
+            model[k] = save_label_encoder(v)
+        elif k == 'classes_':
+            model[k] = v.tolist()
+        elif k == '_Booster':
+            continue
+        else:
+            try:
+                json.dumps({k: v})
+                model[k] = v
+            except TypeError:
+                warnings.warn(str(k) + ' is not saved in Scikit-Learn meta.')
+    booster = old.get_booster()
+    # Store the JSON serialization as an attribute
+    booster.set_attr(scikit_learn=json.dumps(model))
+
+    # Save it into a native model.
+    i = 0
+    while True:
+        path = 'xgboost_native_model_from_' + skl_model + '-' + str(i) + '.bin'
+        if os.path.exists(path):
+            i += 1
+            continue
+        booster.save_model(path)
+        break
+
+
+if __name__ == '__main__':
+    assert xgboost.__version__ != '1.0.0', ('Please use the XGBoost version'
+                                            ' that generates this pickle.')
+    parser = argparse.ArgumentParser(
+        description=('A simple script to convert pickle generated by'
+                     ' XGBoost 0.90 to XGBoost 1.0.0 model (not pickle).'))
+    parser.add_argument(
+        '--old-pickle',
+        type=str,
+        help='Path to old pickle file of Scikit-Learn interface object.  '
+        'Will output a native model converted from this pickle file',
+        required=True)
+    args = parser.parse_args()
+
+    xgboost_skl_90to100(args.old_pickle)
diff --git a/doc/tutorials/saving_model.rst b/doc/tutorials/saving_model.rst
@@ -91,7 +91,12 @@ Loading pickled file from different version of XGBoost
 
 As noted, pickled model is neither portable nor stable, but in some cases the pickled
 models are valuable.  One way to restore it in the future is to load it back with that
-specific version of Python and XGBoost, export the model by calling `save_model`.
+specific version of Python and XGBoost, export the model by calling `save_model`.  To help
+easing the mitigation, we created a simple script for converting pickled XGBoost 0.90
+Scikit-Learn interface object to XGBoost 1.0.0 native model.  Please note that the script
+suits simple use cases, and it's advised not to use pickle when stability is needed.
+It's located in ``xgboost/doc/python`` with the name ``convert_090to100.py``.  See
+comments in the script for more details.
 
 ********************************************************
 Saving and Loading the internal parameters configuration

diff --git a/include/xgboost/learner.h b/include/xgboost/learner.h
@@ -208,6 +208,8 @@ struct LearnerModelParam {
   // As the old `LearnerModelParamLegacy` is still used by binary IO, we keep
   // this one as an immutable copy.
   LearnerModelParam(LearnerModelParamLegacy const& user_param, float base_margin);
+  /* \brief Whether this parameter is initialized with LearnerModelParamLegacy. */
+  bool Initialized() const { return num_feature != 0; }
 };
 
 }  // namespace xgboost

diff --git a/python-package/xgboost/dask.py b/python-package/xgboost/dask.py
@@ -600,6 +600,7 @@ def fit(self,
         results = train(self.client, params, dtrain,
                         num_boost_round=self.get_num_boosting_rounds(),
                         evals=evals)
+        # pylint: disable=attribute-defined-outside-init
         self._Booster = results['booster']
         # pylint: disable=attribute-defined-outside-init
         self.evals_result_ = results['history']

diff --git a/src/common/json.cc b/src/common/json.cc
@@ -24,7 +24,7 @@ void JsonWriter::Visit(JsonArray const* arr) {
   for (size_t i = 0; i < size; ++i) {
     auto const& value = vec[i];
     this->Save(value);
-    if (i != size-1) { Write(", "); }
+    if (i != size-1) { Write(","); }
   }
   this->Write("]");
 }
@@ -38,7 +38,7 @@ void JsonWriter::Visit(JsonObject const* obj) {
   size_t size = obj->getObject().size();
 
   for (auto& value : obj->getObject()) {
-    this->Write("\"" + value.first + "\": ");
+    this->Write("\"" + value.first + "\":");
     this->Save(value.second);
 
     if (i != size-1) {