dmlc · trivialfis · Oct 23, 2019 · trams · Nov 20, 2019
diff --git a/doc/parameter.rst b/doc/parameter.rst
@@ -34,9 +34,11 @@ General Parameters
     configurations based on heuristics, which is displayed as warning message.
     If there's unexpected behaviour, please try to increase value of verbosity.
 
-* ``nthread`` [default to maximum number of threads available if not set]
+* ``nthread`` [default to half number of processors available if not set]
 
-  - Number of parallel threads used to run XGBoost
+  - Number of parallel threads used to run XGBoost, only used if XGBoost is compiled with
+    OpenMP (default).  Internally uses `omp_get_num_procs` to determine number of
+    processors online on current device.
 
 * ``disable_default_eval_metric`` [default=0]
 

diff --git a/include/xgboost/generic_parameters.h b/include/xgboost/generic_parameters.h
@@ -41,8 +41,9 @@ struct GenericParameter : public XGBoostParameter<GenericParameter> {
             "Seed PRNG determnisticly via iterator number, "
             "this option will be switched on automatically on distributed "
             "mode.");
-    DMLC_DECLARE_FIELD(nthread).set_default(0).describe(
-        "Number of threads to use.");
+    DMLC_DECLARE_FIELD(nthread).set_default(0)
+        .set_lower_bound(-1)
+        .describe("Number of threads to use.");
     DMLC_DECLARE_ALIAS(nthread, n_jobs);
 
     DMLC_DECLARE_FIELD(gpu_id)

diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py
@@ -469,8 +469,9 @@ def __init__(self, data, label=None, missing=None,
         feature_types : list, optional
             Set types for features.
         nthread : integer, optional
-            Number of threads to use for loading data from numpy array. If -1,
-            uses maximum threads available on the system.
+            Number of threads to use for loading data from numpy array. If -1
+            or 0, uses half number of processors available on system.
+
         """
         # force into void_p, mac need to pass things in as void_p
         if data is None:

diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py
@@ -73,9 +73,11 @@ class XGBModel(XGBModelBase):
         available.  It's recommended to study this option from parameters
         document.
     n_jobs : int
-        Number of parallel threads used to run xgboost.
+        Number of parallel threads used to run xgboost.  Set to 0 or -1
+        uses half number of processors available on system.
     gamma : float
-        Minimum loss reduction required to make a further partition on a leaf node of the tree.
+        Minimum loss reduction required to make a further partition on a leaf
+        node of the tree.
     min_child_weight : int
         Minimum sum of instance weight(hessian) needed in a child.
     max_delta_step : int
@@ -934,9 +936,11 @@ class XGBRanker(XGBModel):
         booster: string
             Specify which booster to use: gbtree, gblinear or dart.
         n_jobs : int
-            Number of parallel threads used to run xgboost.
+            Number of parallel threads used to run xgboost.  Set to 0 or -1
+            uses half number of processors available on system.
         gamma : float
-            Minimum loss reduction required to make a further partition on a leaf node of the tree.
+            Minimum loss reduction required to make a further partition on a
+            leaf node of the tree.
         min_child_weight : int
             Minimum sum of instance weight(hessian) needed in a child.
         max_delta_step : int

diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc
@@ -18,6 +18,7 @@
 
 #include "c_api_error.h"
 #include "../data/simple_csr_source.h"
+#include "../common/common.h"
 #include "../common/math.h"
 #include "../common/io.h"
 #include "../common/group_data.h"
@@ -404,9 +405,8 @@ XGB_DLL int XGDMatrixCreateFromMat_omp(const bst_float* data,  // NOLINT
   }
 
   API_BEGIN();
-  const int nthreadmax = std::max(omp_get_num_procs() / 2 - 1, 1);
-  //  const int nthreadmax = omp_get_max_threads();
-  if (nthread <= 0) nthread=nthreadmax;
+  nthread = common::OmpDefaultThreads(nthread);
+
   int nthread_orig = omp_get_max_threads();
   omp_set_num_threads(nthread);
 
@@ -557,8 +557,7 @@ XGB_DLL int XGDMatrixCreateFromDT(void** data, const char** feature_stypes,
   }
 
   API_BEGIN();
-  const int nthreadmax = std::max(omp_get_num_procs() / 2 - 1, 1);
-  if (nthread <= 0) nthread = nthreadmax;
+  nthread = common::OmpDefaultThreads(nthread);
   int nthread_orig = omp_get_max_threads();
   omp_set_num_threads(nthread);
 

diff --git a/src/common/common.h b/src/common/common.h
@@ -6,16 +6,19 @@
 #ifndef XGBOOST_COMMON_COMMON_H_
 #define XGBOOST_COMMON_COMMON_H_
 
-#include <xgboost/base.h>
-#include <xgboost/logging.h>
+#include <dmlc/omp.h>
 
+#include <algorithm>
 #include <exception>
 #include <limits>
 #include <type_traits>
 #include <vector>
 #include <string>
 #include <sstream>
 
+#include "xgboost/base.h"
+#include "xgboost/logging.h"
+
 #if defined(__CUDACC__)
 #include <thrust/system/cuda/error.h>
 #include <thrust/system_error.h>
@@ -142,6 +145,15 @@ class Range {
 };
 
 int AllVisibleGPUs();
+
+inline int OmpDefaultThreads(int32_t threads) {
+  if (threads <= 0) {
+    threads = std::max(omp_get_num_procs() / 2, 1);
+  }
+  return threads;
+}
+
+// int
 }  // namespace common
 }  // namespace xgboost
 #endif  // XGBOOST_COMMON_COMMON_H_
diff --git a/src/data/ellpack_page_source.cc b/src/data/ellpack_page_source.cc
@@ -29,14 +29,14 @@ bool EllpackPageSource::Next() {
 EllpackPage& EllpackPageSource::Value() {
   LOG(FATAL) << "Internal Error: "
                 "XGBoost is not compiled with CUDA but EllpackPageSource is required";
-  EllpackPage* page;
+  EllpackPage* page {nullptr};
   return *page;
 }
 
 const EllpackPage& EllpackPageSource::Value() const {
   LOG(FATAL) << "Internal Error: "
                 "XGBoost is not compiled with CUDA but EllpackPageSource is required";
-  EllpackPage* page;
+  EllpackPage* page {nullptr};
   return *page;
 }
 

diff --git a/src/learner.cc b/src/learner.cc
@@ -162,9 +162,9 @@ class LearnerImpl : public Learner {
     generic_param_.CheckDeprecated();
 
     ConsoleLogger::Configure(args);
-    if (generic_param_.nthread != 0) {
-      omp_set_num_threads(generic_param_.nthread);
-    }
+
+    auto threads = common::OmpDefaultThreads(generic_param_.nthread);
+    omp_set_num_threads(threads);
 
     // add additional parameters
     // These are cosntraints that need to be satisfied.

diff --git a/tests/cpp/test_learner.cc b/tests/cpp/test_learner.cc
@@ -1,11 +1,13 @@
 // Copyright by Contributors
 #include <gtest/gtest.h>
-#include <vector>
-#include "helpers.h"
 #include <dmlc/filesystem.h>
-
-#include <xgboost/learner.h>
 #include <xgboost/version_config.h>
+#include <xgboost/learner.h>
+
+#include <vector>
+
+#include "helpers.h"
+#include "../../src/common/common.h"
 
 namespace xgboost {
 
@@ -28,6 +30,31 @@ TEST(Learner, Basic) {
   static_assert(std::is_integral<decltype(patch)>::value, "Wrong patch version type");
 }
 
+#if defined(_OPENMP)
+TEST(Learner, Threads) {
+  auto mat_ptr = CreateDMatrix(10, 10, 0);
+  std::vector<std::shared_ptr<xgboost::DMatrix>> mat = {*mat_ptr};
+  auto ori_threads = omp_get_max_threads();
+
+  auto test_set_threads =
+      [&](int threads) {
+        auto learner = std::unique_ptr<Learner>(Learner::Create(mat));
+        auto args = Args{{"nthread", std::to_string(threads)}};
+        learner->SetParams(args);
+        learner->Configure();
+        auto nthreads = omp_get_max_threads();
+        ASSERT_EQ(nthreads, common::OmpDefaultThreads(threads));
+      };
+
+  test_set_threads(0);
+  test_set_threads(-1);
+  test_set_threads(8);
+
+  omp_set_num_threads(ori_threads);
+  delete mat_ptr;
+}
+#endif
+
 TEST(Learner, CheckGroup) {
   using Arg = std::pair<std::string, std::string>;
   size_t constexpr kNumGroups = 4;