Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Clarify default number of threads. #4975

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions doc/parameter.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,11 @@ General Parameters
configurations based on heuristics, which is displayed as warning message.
If there's unexpected behaviour, please try to increase value of verbosity.

* ``nthread`` [default to maximum number of threads available if not set]
* ``nthread`` [default to half number of processors available if not set]

- Number of parallel threads used to run XGBoost
- Number of parallel threads used to run XGBoost, only used if XGBoost is compiled with
OpenMP (default). Internally uses `omp_get_num_procs` to determine number of
processors online on current device.

* ``disable_default_eval_metric`` [default=0]

Expand Down
5 changes: 3 additions & 2 deletions include/xgboost/generic_parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,9 @@ struct GenericParameter : public XGBoostParameter<GenericParameter> {
"Seed PRNG determnisticly via iterator number, "
"this option will be switched on automatically on distributed "
"mode.");
DMLC_DECLARE_FIELD(nthread).set_default(0).describe(
"Number of threads to use.");
DMLC_DECLARE_FIELD(nthread).set_default(0)
.set_lower_bound(-1)
.describe("Number of threads to use.");
DMLC_DECLARE_ALIAS(nthread, n_jobs);

DMLC_DECLARE_FIELD(gpu_id)
Expand Down
5 changes: 3 additions & 2 deletions python-package/xgboost/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,8 +469,9 @@ def __init__(self, data, label=None, missing=None,
feature_types : list, optional
Set types for features.
nthread : integer, optional
Number of threads to use for loading data from numpy array. If -1,
uses maximum threads available on the system.
Number of threads to use for loading data from numpy array. If -1
or 0, uses half number of processors available on system.

"""
# force into void_p, mac need to pass things in as void_p
if data is None:
Expand Down
12 changes: 8 additions & 4 deletions python-package/xgboost/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,11 @@ class XGBModel(XGBModelBase):
available. It's recommended to study this option from parameters
document.
n_jobs : int
Number of parallel threads used to run xgboost.
Number of parallel threads used to run xgboost. Set to 0 or -1
uses half number of processors available on system.
gamma : float
Minimum loss reduction required to make a further partition on a leaf node of the tree.
Minimum loss reduction required to make a further partition on a leaf
node of the tree.
min_child_weight : int
Minimum sum of instance weight(hessian) needed in a child.
max_delta_step : int
Expand Down Expand Up @@ -934,9 +936,11 @@ class XGBRanker(XGBModel):
booster: string
Specify which booster to use: gbtree, gblinear or dart.
n_jobs : int
Number of parallel threads used to run xgboost.
Number of parallel threads used to run xgboost. Set to 0 or -1
uses half number of processors available on system.
gamma : float
Minimum loss reduction required to make a further partition on a leaf node of the tree.
Minimum loss reduction required to make a further partition on a
leaf node of the tree.
min_child_weight : int
Minimum sum of instance weight(hessian) needed in a child.
max_delta_step : int
Expand Down
9 changes: 4 additions & 5 deletions src/c_api/c_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include "c_api_error.h"
#include "../data/simple_csr_source.h"
#include "../common/common.h"
#include "../common/math.h"
#include "../common/io.h"
#include "../common/group_data.h"
Expand Down Expand Up @@ -404,9 +405,8 @@ XGB_DLL int XGDMatrixCreateFromMat_omp(const bst_float* data, // NOLINT
}

API_BEGIN();
const int nthreadmax = std::max(omp_get_num_procs() / 2 - 1, 1);
// const int nthreadmax = omp_get_max_threads();
if (nthread <= 0) nthread=nthreadmax;
nthread = common::OmpDefaultThreads(nthread);

int nthread_orig = omp_get_max_threads();
omp_set_num_threads(nthread);

Expand Down Expand Up @@ -557,8 +557,7 @@ XGB_DLL int XGDMatrixCreateFromDT(void** data, const char** feature_stypes,
}

API_BEGIN();
const int nthreadmax = std::max(omp_get_num_procs() / 2 - 1, 1);
if (nthread <= 0) nthread = nthreadmax;
nthread = common::OmpDefaultThreads(nthread);
int nthread_orig = omp_get_max_threads();
omp_set_num_threads(nthread);

Expand Down
16 changes: 14 additions & 2 deletions src/common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,19 @@
#ifndef XGBOOST_COMMON_COMMON_H_
#define XGBOOST_COMMON_COMMON_H_

#include <xgboost/base.h>
#include <xgboost/logging.h>
#include <dmlc/omp.h>

#include <algorithm>
#include <exception>
#include <limits>
#include <type_traits>
#include <vector>
#include <string>
#include <sstream>

#include "xgboost/base.h"
#include "xgboost/logging.h"

#if defined(__CUDACC__)
#include <thrust/system/cuda/error.h>
#include <thrust/system_error.h>
Expand Down Expand Up @@ -142,6 +145,15 @@ class Range {
};

int AllVisibleGPUs();

inline int OmpDefaultThreads(int32_t threads) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest to make it not inline function and expose it in C API and Python API

if (threads <= 0) {
threads = std::max(omp_get_num_procs() / 2, 1);
}
return threads;
}

// int
} // namespace common
} // namespace xgboost
#endif // XGBOOST_COMMON_COMMON_H_
4 changes: 2 additions & 2 deletions src/data/ellpack_page_source.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,14 @@ bool EllpackPageSource::Next() {
EllpackPage& EllpackPageSource::Value() {
LOG(FATAL) << "Internal Error: "
"XGBoost is not compiled with CUDA but EllpackPageSource is required";
EllpackPage* page;
EllpackPage* page {nullptr};
return *page;
}

const EllpackPage& EllpackPageSource::Value() const {
LOG(FATAL) << "Internal Error: "
"XGBoost is not compiled with CUDA but EllpackPageSource is required";
EllpackPage* page;
EllpackPage* page {nullptr};
return *page;
}

Expand Down
6 changes: 3 additions & 3 deletions src/learner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -162,9 +162,9 @@ class LearnerImpl : public Learner {
generic_param_.CheckDeprecated();

ConsoleLogger::Configure(args);
if (generic_param_.nthread != 0) {
omp_set_num_threads(generic_param_.nthread);
}

auto threads = common::OmpDefaultThreads(generic_param_.nthread);
omp_set_num_threads(threads);

// add additional parameters
// These are cosntraints that need to be satisfied.
Expand Down
35 changes: 31 additions & 4 deletions tests/cpp/test_learner.cc
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
// Copyright by Contributors
#include <gtest/gtest.h>
#include <vector>
#include "helpers.h"
#include <dmlc/filesystem.h>

#include <xgboost/learner.h>
#include <xgboost/version_config.h>
#include <xgboost/learner.h>

#include <vector>

#include "helpers.h"
#include "../../src/common/common.h"

namespace xgboost {

Expand All @@ -28,6 +30,31 @@ TEST(Learner, Basic) {
static_assert(std::is_integral<decltype(patch)>::value, "Wrong patch version type");
}

#if defined(_OPENMP)
TEST(Learner, Threads) {
auto mat_ptr = CreateDMatrix(10, 10, 0);
std::vector<std::shared_ptr<xgboost::DMatrix>> mat = {*mat_ptr};
auto ori_threads = omp_get_max_threads();

auto test_set_threads =
[&](int threads) {
auto learner = std::unique_ptr<Learner>(Learner::Create(mat));
auto args = Args{{"nthread", std::to_string(threads)}};
learner->SetParams(args);
learner->Configure();
auto nthreads = omp_get_max_threads();
ASSERT_EQ(nthreads, common::OmpDefaultThreads(threads));
};

test_set_threads(0);
test_set_threads(-1);
test_set_threads(8);

omp_set_num_threads(ori_threads);
delete mat_ptr;
}
#endif

TEST(Learner, CheckGroup) {
using Arg = std::pair<std::string, std::string>;
size_t constexpr kNumGroups = 4;
Expand Down