dmlc · RukhovichIV · May 26, 2021 · May 31, 2021 · Jun 11, 2021 · Jun 18, 2021
diff --git a/Jenkinsfile-win64 b/Jenkinsfile-win64
@@ -40,7 +40,8 @@ pipeline {
       steps {
         script {
           parallel ([
-            'build-win64-cuda10.1': { BuildWin64() }
+            'build-win64-cuda10.1': { BuildWin64() },
+            'build-rpkg-win64-cuda10.1': { BuildRPackageWithCUDAWin64() }
           ])
         }
       }
@@ -75,6 +76,7 @@ def checkoutSrcs() {
 
 def BuildWin64() {
   node('win64 && cuda10_unified') {
+    deleteDir()
     unstash name: 'srcs'
     echo "Building XGBoost for Windows AMD64 target..."
     bat "nvcc --version"
@@ -115,8 +117,26 @@ def BuildWin64() {
   }
 }
 
+def BuildRPackageWithCUDAWin64() {
+  node('win64 && cuda10_unified') {
+    deleteDir()
+    unstash name: 'srcs'
+    bat "nvcc --version"
+    if (env.BRANCH_NAME == 'master' || env.BRANCH_NAME.startsWith('release')) {
+      bat """
+      bash tests/ci_build/build_r_pkg_with_cuda_win64.sh ${commit_id}
+      """
+      echo 'Uploading R tarball...'
+      path = ("${BRANCH_NAME}" == 'master') ? '' : "${BRANCH_NAME}/"
+      s3Upload bucket: 'xgboost-nightly-builds', path: path, acl: 'PublicRead', includePathPattern:'xgboost_r_gpu_win64_*.tar.gz'
+    }
+    deleteDir()
+  }
+}
+
 def TestWin64() {
   node('win64 && cuda10_unified') {
+    deleteDir()
     unstash name: 'srcs'
     unstash name: 'xgboost_whl'
     unstash name: 'xgboost_cli'
@@ -127,7 +147,7 @@ def TestWin64() {
     bat "build\\testxgboost.exe"
     echo "Installing Python dependencies..."
     def env_name = 'win64_' + UUID.randomUUID().toString().replaceAll('-', '')
-    bat "conda env create -n ${env_name} --file=tests/ci_build/conda_env/win64_test.yml"
+    bat "conda activate && mamba env create -n ${env_name} --file=tests/ci_build/conda_env/win64_test.yml"
     echo "Installing Python wheel..."
     bat """
     conda activate ${env_name} && for /R %%i in (python-package\\dist\\*.whl) DO python -m pip install "%%i"

diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake
@@ -90,7 +90,9 @@ function(format_gencode_flags flags out)
   endif()
   # Set up architecture flags
   if(NOT flags)
-    if (CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
+    if (CUDA_VERSION VERSION_GREATER_EQUAL "11.1")
+      set(flags "50;52;60;61;70;75;80;86")
+    elseif (CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
       set(flags "35;50;52;60;61;70;75;80")
     elseif(CUDA_VERSION VERSION_GREATER_EQUAL "10.0")
       set(flags "35;50;52;60;61;70;75")

diff --git a/doc/install.rst b/doc/install.rst
@@ -61,9 +61,12 @@ R
      and then run ``install.packages("xgboost")``. Without OpenMP, XGBoost will only use a
      single CPU core, leading to suboptimal training speed.
 
-* We also provide **experimental** pre-built binary on Linux x86_64 with GPU support.
+* We also provide **experimental** pre-built binary with GPU support. With this binary,
+  you will be able to use the GPU algorithm without building XGBoost from the source.
   Download the binary package from the Releases page. The file name will be of the form
-  ``xgboost_r_gpu_linux_[version].tar.gz``. Then install XGBoost by running:
+  ``xgboost_r_gpu_[os]_[version].tar.gz``, where ``[os]`` is either ``linux`` or ``win64``.
+  (We build the binaries for 64-bit Linux and Windows.)
+  Then install XGBoost by running:
 
   .. code-block:: bash
 
@@ -142,9 +145,11 @@ R
 -
 
 Other than standard CRAN installation, we also provide *experimental* pre-built binary on
-Linux x86_64 with GPU support.  You can go to `this page
+with GPU support.  You can go to `this page
 <https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds/list.html>`_, Find the commit
-ID you want to install: ``xgboost_r_gpu_linux_[commit].tar.gz``, download it then run:
+ID you want to install and then locate the file ``xgboost_r_gpu_[os]_[commit].tar.gz``,
+where ``[os]`` is either ``linux`` or ``win64``. (We build the binaries for 64-bit Linux
+and Windows.) Download it and run the following commands:
 
 .. code-block:: bash
 

diff --git a/doc/parameter.rst b/doc/parameter.rst
@@ -141,14 +141,12 @@ Parameters for Tree Booster
     - ``auto``: Use heuristic to choose the fastest method.
 
       - For small dataset, exact greedy (``exact``) will be used.
-      - For larger dataset, approximate algorithm (``approx``) will be chosen.  It's
-        recommended to try ``hist`` and ``gpu_hist`` for higher performance with large
-        dataset.
+      - For larger dataset, histogram optimized approximate algorithm (``hist``) will be chosen.
         (``gpu_hist``)has support for ``external memory``.
 
       - Because old behavior is always use exact greedy in single machine, user will get a
         message when approximate algorithm is chosen to notify this choice.
-    - ``exact``: Exact greedy algorithm.  Enumerates all split candidates.
+    - ``exact``: Exact greedy algorithm. Enumerates all split candidates.
     - ``approx``: Approximate greedy algorithm using quantile sketch and gradient histogram.
     - ``hist``: Faster histogram optimized approximate greedy algorithm.
     - ``gpu_hist``: GPU implementation of ``hist`` algorithm.
@@ -245,16 +243,6 @@ Additional parameters for ``hist`` and ``gpu_hist`` tree method
 
   - Use single precision to build histograms instead of double precision.
 
-Additional parameters for ``gpu_hist`` tree method
-==================================================
-
-* ``deterministic_histogram``, [default=``true``]
-
-  - Build histogram on GPU deterministically.  Histogram building is not deterministic due
-    to the non-associative aspect of floating point summation.  We employ a pre-rounding
-    routine to mitigate the issue, which may lead to slightly lower accuracy.  Set to
-    ``false`` to disable it.
-
 Additional parameters for Dart Booster (``booster=dart``)
 =========================================================
 

diff --git a/include/xgboost/base.h b/include/xgboost/base.h
@@ -255,9 +255,12 @@ class GradientPairInternal {
 
 /*! \brief gradient statistics pair usually needed in gradient boosting */
 using GradientPair = detail::GradientPairInternal<float>;
-
 /*! \brief High precision gradient statistics pair */
 using GradientPairPrecise = detail::GradientPairInternal<double>;
+/*! \brief Fixed point representation for gradient pair. */
+using GradientPairInt32 = detail::GradientPairInternal<int>;
+/*! \brief Fixed point representation for high precision gradient pair. */
+using GradientPairInt64 = detail::GradientPairInternal<int64_t>;
 
 using Args = std::vector<std::pair<std::string, std::string> >;
 

diff --git a/...j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/ExternalCheckpointManagerSuite.scala b/...j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/ExternalCheckpointManagerSuite.scala
@@ -26,8 +26,7 @@ class ExternalCheckpointManagerSuite extends FunSuite with TmpFolderPerSuite wit
 
   private def produceParamMap(checkpointPath: String, checkpointInterval: Int):
   Map[String, Any] = {
-    Map("eta" -> "1", "max_depth" -> "2", "silent" -> "1",
-      "objective" -> "binary:logistic", "num_workers" -> sc.defaultParallelism,
+    Map("eta" -> "1", "max_depth" -> "2", "silent" -> "1", "objective" -> "binary:logistic",
       "checkpoint_path" -> checkpointPath, "checkpoint_interval" -> checkpointInterval)
   }
 
@@ -86,7 +85,10 @@ class ExternalCheckpointManagerSuite extends FunSuite with TmpFolderPerSuite wit
   }
 
 
-  private def trainingWithCheckpoint(cacheData: Boolean, skipCleanCheckpoint: Boolean): Unit = {
+  private def trainingWithCheckpoint(
+      cacheData: Boolean,
+      skipCleanCheckpoint: Boolean,
+      useSingleUpdater: Boolean): Unit = {
     val eval = new EvalError()
     val training = buildDataFrame(Classification.train)
     val testDM = new DMatrix(Classification.test.iterator)
@@ -98,8 +100,11 @@ class ExternalCheckpointManagerSuite extends FunSuite with TmpFolderPerSuite wit
     val cacheDataMap = if (cacheData) Map("cacheTrainingSet" -> true) else Map()
     val skipCleanCheckpointMap =
       if (skipCleanCheckpoint) Map("skip_clean_checkpoint" -> true) else Map()
+    val useSingleUpdaterMap =
+      if (useSingleUpdater) Map("tree_method" -> "auto", "num_workers" -> 1)
+      else Map("tree_method" -> "approx", "num_workers" -> sc.defaultParallelism)
 
-    val finalParamMap = paramMap ++ cacheDataMap ++ skipCleanCheckpointMap
+    val finalParamMap = paramMap ++ cacheDataMap ++ skipCleanCheckpointMap ++ useSingleUpdaterMap
 
     val prevModel = new XGBoostClassifier(finalParamMap ++ Seq("num_round" -> 5)).fit(training)
 
@@ -122,14 +127,19 @@ class ExternalCheckpointManagerSuite extends FunSuite with TmpFolderPerSuite wit
   }
 
   test("training with checkpoint boosters") {
-    trainingWithCheckpoint(cacheData = false, skipCleanCheckpoint = true)
+    trainingWithCheckpoint(cacheData = false, skipCleanCheckpoint = true, useSingleUpdater = false)
+  }
+
+  test("training with checkpoint boosters using tree method heuristic") {
+    // Hist method output is different when num_workers > 1
+    trainingWithCheckpoint(cacheData = false, skipCleanCheckpoint = true, useSingleUpdater = true)
   }
 
   test("training with checkpoint boosters with cached training dataset") {
-    trainingWithCheckpoint(cacheData = true, skipCleanCheckpoint = true)
+    trainingWithCheckpoint(cacheData = true, skipCleanCheckpoint = true, useSingleUpdater = false)
   }
 
   test("the checkpoint file should be cleaned after a successful training") {
-    trainingWithCheckpoint(cacheData = false, skipCleanCheckpoint = false)
+    trainingWithCheckpoint(cacheData = false, skipCleanCheckpoint = false, useSingleUpdater = false)
   }
 }
diff --git a/...xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostClassifierSuite.scala b/...xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostClassifierSuite.scala
@@ -208,36 +208,56 @@ class XGBoostCpuClassifierSuite extends XGBoostClassifierSuiteBase {
     val testDM = new DMatrix(Classification.test.iterator)
     val trainingDF = buildDataFrame(Classification.train)
     val testDF = buildDataFrame(Classification.test)
-    checkResultsWithXGBoost4j(trainingDM, testDM, trainingDF, testDF)
+    // Hist method output is different when num_workers > 1
+    checkResultsWithXGBoost4j(trainingDM, testDM, trainingDF, testDF, "hist", 1)
+  }
+
+  test("XGBoost-Spark XGBoostClassifier output should match XGBoost4j (distributed)") {
+    val trainingDM = new DMatrix(Classification.train.iterator)
+    val testDM = new DMatrix(Classification.test.iterator)
+    val trainingDF = buildDataFrame(Classification.train)
+    val testDF = buildDataFrame(Classification.test)
+    checkResultsWithXGBoost4j(trainingDM, testDM, trainingDF, testDF, "approx", numWorkers)
   }
 
   test("XGBoostClassifier should make correct predictions after upstream random sort") {
     val trainingDM = new DMatrix(Classification.train.iterator)
     val testDM = new DMatrix(Classification.test.iterator)
     val trainingDF = buildDataFrameWithRandSort(Classification.train)
     val testDF = buildDataFrameWithRandSort(Classification.test)
-    checkResultsWithXGBoost4j(trainingDM, testDM, trainingDF, testDF)
+    // Hist method output is different when num_workers > 1
+    checkResultsWithXGBoost4j(trainingDM, testDM, trainingDF, testDF, "hist", 1)
+  }
+
+  test("XGBoostClassifier should make correct predictions after upstream random sort (dist)") {
+    val trainingDM = new DMatrix(Classification.train.iterator)
+    val testDM = new DMatrix(Classification.test.iterator)
+    val trainingDF = buildDataFrameWithRandSort(Classification.train)
+    val testDF = buildDataFrameWithRandSort(Classification.test)
+    checkResultsWithXGBoost4j(trainingDM, testDM, trainingDF, testDF, "approx", numWorkers)
   }
 
   private def checkResultsWithXGBoost4j(
       trainingDM: DMatrix,
       testDM: DMatrix,
       trainingDF: DataFrame,
       testDF: DataFrame,
+      explicitTreeMethod: String,
+      explicitNumWorkers: Int,
       round: Int = 5): Unit = {
     val paramMap = Map(
       "eta" -> "1",
       "max_depth" -> "6",
       "silent" -> "1",
       "objective" -> "binary:logistic",
-      "tree_method" -> treeMethod,
+      "tree_method" -> explicitTreeMethod,
       "max_bin" -> 16)
 
     val model1 = ScalaXGBoost.train(trainingDM, paramMap, round)
     val prediction1 = model1.predict(testDM)
 
     val model2 = new XGBoostClassifier(paramMap ++ Array("num_round" -> round,
-      "num_workers" -> numWorkers)).fit(trainingDF)
+      "num_workers" -> explicitNumWorkers)).fit(trainingDF)
 
     val prediction2 = model2.transform(testDF).
       collect().map(row => (row.getAs[Int]("id"), row.getAs[DenseVector]("probability"))).toMap
@@ -296,8 +316,9 @@ class XGBoostCpuClassifierSuite extends XGBoostClassifierSuiteBase {
   }
 
   test("infrequent features (use_external_memory)") {
+    // Tree method must to be explicitly set. Otherwise, the heuristic will be incorrect
     val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
-      "objective" -> "binary:logistic",
+      "objective" -> "binary:logistic", "tree_method" -> "approx",
       "num_round" -> 5, "num_workers" -> 2, "use_external_memory" -> true, "missing" -> 0)
     import DataUtils._
     val sparkSession = SparkSession.builder().getOrCreate()

diff --git a/...es/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostGeneralSuite.scala b/...es/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostGeneralSuite.scala
@@ -44,7 +44,8 @@ class XGBoostGeneralSuite extends FunSuite with TmpFolderPerSuite with PerTest {
     val eval = new EvalError()
     val training = buildDataFrame(Classification.train)
     val testDM = new DMatrix(Classification.test.iterator)
-    val paramMap = Map("eta" -> "1", "max_depth" -> "6",
+    // Tree method must to be explicitly set. Otherwise, the heuristic will be incorrect
+    val paramMap = Map("eta" -> "1", "max_depth" -> "6", "tree_method" -> "approx",
       "objective" -> "binary:logistic", "num_round" -> 5, "num_workers" -> numWorkers,
       "use_external_memory" -> true)
     val model = new XGBoostClassifier(paramMap).fit(training)

diff --git a/.../xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostRegressorSuite.scala b/.../xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostRegressorSuite.scala
@@ -32,36 +32,56 @@ abstract class XGBoostRegressorSuiteBase extends FunSuite with PerTest {
     val testDM = new DMatrix(Regression.test.iterator)
     val trainingDF = buildDataFrame(Regression.train)
     val testDF = buildDataFrame(Regression.test)
-    checkResultsWithXGBoost4j(trainingDM, testDM, trainingDF, testDF)
+    // Hist method output is different when num_workers > 1
+    checkResultsWithXGBoost4j(trainingDM, testDM, trainingDF, testDF, "hist", 1)
+  }
+
+  test("XGBoost-Spark XGBoostRegressor output should match XGBoost4j (distributed)") {
+    val trainingDM = new DMatrix(Regression.train.iterator)
+    val testDM = new DMatrix(Regression.test.iterator)
+    val trainingDF = buildDataFrame(Regression.train)
+    val testDF = buildDataFrame(Regression.test)
+    checkResultsWithXGBoost4j(trainingDM, testDM, trainingDF, testDF, "approx", numWorkers)
   }
 
   test("XGBoostRegressor should make correct predictions after upstream random sort") {
     val trainingDM = new DMatrix(Regression.train.iterator)
     val testDM = new DMatrix(Regression.test.iterator)
     val trainingDF = buildDataFrameWithRandSort(Regression.train)
     val testDF = buildDataFrameWithRandSort(Regression.test)
-    checkResultsWithXGBoost4j(trainingDM, testDM, trainingDF, testDF)
+    // Hist method output is different when num_workers > 1
+    checkResultsWithXGBoost4j(trainingDM, testDM, trainingDF, testDF, "hist", 1)
+  }
+
+  test("XGBoostRegressor should make correct predictions after upstream random sort (dist)") {
+    val trainingDM = new DMatrix(Regression.train.iterator)
+    val testDM = new DMatrix(Regression.test.iterator)
+    val trainingDF = buildDataFrameWithRandSort(Regression.train)
+    val testDF = buildDataFrameWithRandSort(Regression.test)
+    checkResultsWithXGBoost4j(trainingDM, testDM, trainingDF, testDF, "approx", numWorkers)
   }
 
   private def checkResultsWithXGBoost4j(
       trainingDM: DMatrix,
       testDM: DMatrix,
       trainingDF: DataFrame,
       testDF: DataFrame,
+      explicitTreeMethod: String,
+      explicitNumWorkers: Int,
       round: Int = 5): Unit = {
     val paramMap = Map(
       "eta" -> "1",
       "max_depth" -> "6",
       "silent" -> "1",
       "objective" -> "reg:squarederror",
       "max_bin" -> 16,
-      "tree_method" -> treeMethod)
+      "tree_method" -> explicitTreeMethod)
 
     val model1 = ScalaXGBoost.train(trainingDM, paramMap, round)
     val prediction1 = model1.predict(testDM)
 
     val model2 = new XGBoostRegressor(paramMap ++ Array("num_round" -> round,
-      "num_workers" -> numWorkers)).fit(trainingDF)
+      "num_workers" -> explicitNumWorkers)).fit(trainingDF)
 
     val prediction2 = model2.transform(testDF).
         collect().map(row => (row.getAs[Int]("id"), row.getAs[Double]("prediction"))).toMap
@@ -130,8 +150,9 @@ abstract class XGBoostRegressorSuiteBase extends FunSuite with PerTest {
   }
 
   test("use weight") {
+    // Hist method output is different when num_workers > 1
     val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
-      "objective" -> "reg:squarederror", "num_round" -> 5, "num_workers" -> numWorkers,
+      "objective" -> "reg:squarederror", "num_round" -> 5, "num_workers" -> 1,
       "tree_method" -> treeMethod)
 
     val getWeightFromId = udf({id: Int => if (id == 0) 1.0f else 0.001f})
@@ -145,6 +166,22 @@ abstract class XGBoostRegressorSuiteBase extends FunSuite with PerTest {
     prediction.foreach(x => assert(math.abs(x.getAs[Double]("prediction") - first) <= 0.01f))
   }
 
+  test("use weight (distributed)") {
+    val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
+      "objective" -> "reg:squarederror", "num_round" -> 5, "num_workers" -> numWorkers,
+      "tree_method" -> "approx")
+
+    val getWeightFromId = udf({id: Int => if (id == 0) 1.0f else 0.001f})
+    val trainingDF = buildDataFrame(Regression.train)
+      .withColumn("weight", getWeightFromId(col("id")))
+    val testDF = buildDataFrame(Regression.test)
+
+    val model = new XGBoostRegressor(paramMap).setWeightCol("weight").fit(trainingDF)
+    val prediction = model.transform(testDF).collect()
+    val first = prediction.head.getAs[Double]("prediction")
+    prediction.foreach(x => assert(math.abs(x.getAs[Double]("prediction") - first) <= 0.01f))
+  }
+
   test("test predictionLeaf") {
     val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
       "objective" -> "reg:squarederror", "num_round" -> 5, "num_workers" -> numWorkers,

diff --git a/python-package/setup.py b/python-package/setup.py
@@ -302,7 +302,7 @@ def run(self):
 
     with open(os.path.join(CURRENT_DIR, 'README.rst'), encoding='utf-8') as fd:
         description = fd.read()
-    with open(os.path.join(CURRENT_DIR, 'xgboost/VERSION')) as fd:
+    with open(os.path.join(CURRENT_DIR, 'xgboost/VERSION'), encoding="ascii") as fd:
         version = fd.read().strip()
 
     setup(name='xgboost',

diff --git a/python-package/xgboost/__init__.py b/python-package/xgboost/__init__.py
@@ -22,7 +22,7 @@
     pass
 
 VERSION_FILE = os.path.join(os.path.dirname(__file__), 'VERSION')
-with open(VERSION_FILE) as f:
+with open(VERSION_FILE, encoding="ascii") as f:
     __version__ = f.read().strip()
 
 __all__ = ['DMatrix', 'DeviceQuantileDMatrix', 'Booster', 'DataIter',