diff --git a/src/common/hist_util.cc b/src/common/hist_util.cc index e2514d85653a..ebcb8573991e 100644 --- a/src/common/hist_util.cc +++ b/src/common/hist_util.cc @@ -48,6 +48,7 @@ void HistogramCuts::Build(DMatrix* dmat, uint32_t const max_num_bins) { DenseCuts cuts(this); cuts.Build(dmat, max_num_bins); } + LOG(INFO) << "Total number of hist bins: " << cut_ptrs_.back(); } bool CutsBuilder::UseGroup(DMatrix* dmat) { diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc index 03168d2bd482..b5a2637afcbd 100644 --- a/src/tree/updater_quantile_hist.cc +++ b/src/tree/updater_quantile_hist.cc @@ -556,7 +556,7 @@ void QuantileHistMaker::Builder::BuildHistsBatch(const std::vector& reinterpret_cast(gpair.data()); // 2. Build partial histograms for each node - #pragma omp parallel for schedule(guided) + #pragma omp parallel for schedule(static) for (int32_t itask = 0; itask < n_hist_buidling_tasks; ++itask) { const size_t tid = omp_get_thread_num(); const int32_t nid = task_nid[itask]; @@ -856,7 +856,7 @@ bool QuantileHistMaker::Builder::UpdatePredictionCache( } } -#pragma omp parallel for schedule(guided) +#pragma omp parallel for schedule(static) for (omp_ulong k = 0; k < tasks_elem.size(); ++k) { const RowSetCollection::Elem rowset = tasks_elem[k]; if (rowset.begin != nullptr && rowset.end != nullptr && rowset.node_id != -1) { @@ -1079,7 +1079,7 @@ void QuantileHistMaker::Builder::EvaluateSplitsBatch( // partial results std::vector> splits(tasks.size()); // parallel enumeration -#pragma omp parallel for schedule(guided) + #pragma omp parallel for schedule(static) for (omp_ulong i = 0; i < tasks.size(); ++i) { // node_idx : offset within `nodes` list const int32_t node_idx = tasks[i].first; diff --git a/tests/cpp/tree/test_quantile_hist.cc b/tests/cpp/tree/test_quantile_hist.cc index a5370d17e61d..38a7e712bf5c 100644 --- a/tests/cpp/tree/test_quantile_hist.cc +++ b/tests/cpp/tree/test_quantile_hist.cc @@ -225,6 +225,14 @@ class QuantileHistMock : public QuantileHistMaker { delete dmat; } + + void TestEvaluateSplitParallel(const GHistIndexBlockMatrix &quantile_index_block, + const RegTree &tree) { + omp_set_num_threads(2); + TestEvaluateSplit(quantile_index_block, tree); + omp_set_num_threads(1); + } + }; int static constexpr kNRows = 8, kNCols = 16; diff --git a/tests/python/test_openmp.py b/tests/python/test_openmp.py index eb73daf2c934..d26f7f29319c 100644 --- a/tests/python/test_openmp.py +++ b/tests/python/test_openmp.py @@ -1,43 +1,74 @@ # -*- coding: utf-8 -*- -from scipy.sparse import csr_matrix import xgboost as xgb import unittest +import numpy as np class TestOMP(unittest.TestCase): def test_omp(self): - # a contrived example where one node has an instance set of size 2. - data = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1] - indices = [2, 1, 1, 2, 0, 0, 2, 0, 1, 3] - indptr = [0, 1, 2, 4, 5, 7, 9, 10] - A = csr_matrix((data, indices, indptr), shape=(7, 4)) - y = [1, 1, 0, 0, 0, 1, 1] - dtrain = xgb.DMatrix(A, label=y) - - # 1. use 3 threads to train a tree with an instance set of size 2 + dpath = 'demo/data/' + dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train') + dtest = xgb.DMatrix(dpath + 'agaricus.txt.test') + param = {'booster': 'gbtree', 'objective': 'binary:logistic', - 'grow_policy': 'lossguide', + 'grow_policy': 'depthwise', 'tree_method': 'hist', - 'eval_metric': 'auc', - 'max_depth': 0, - 'max_leaves': 1024, - 'min_child_weight': 0, - 'nthread': 3} - - watchlist = [(dtrain, 'train')] - num_round = 1 - res = {} - xgb.train(param, dtrain, num_round, watchlist, evals_result=res) - assert res['train']['auc'][-1] > 0.99 - - # 2. vary number of threads and test whether you get the same result + 'eval_metric': 'error', + 'max_depth': 5, + 'min_child_weight': 0} + + watchlist = [(dtest, 'eval'), (dtrain, 'train')] + num_round = 5 + + def run_trial(): + res = {} + bst = xgb.train(param, dtrain, num_round, watchlist, evals_result=res) + metrics = [res['train']['error'][-1], res['eval']['error'][-1]] + preds = bst.predict(dtest) + return metrics, preds + + def consist_test(title, n): + auc, pred = run_trial() + for i in range(n-1): + auc2, pred2 = run_trial() + try: + assert auc == auc2 + assert np.array_equal(pred, pred2) + except Exception as e: + print('-------test %s failed, num_trial: %d-------' % (title, i)) + raise e + auc, pred = auc2, pred2 + return auc, pred + + print('test approx ...') + param['tree_method'] = 'approx' + param['nthread'] = 1 - res2 = {} - xgb.train(param, dtrain, num_round, watchlist, evals_result=res2) - assert res['train']['auc'][-1] == res2['train']['auc'][-1] + auc_1, pred_1 = consist_test('approx_thread_1', 100) param['nthread'] = 2 - res3 = {} - xgb.train(param, dtrain, num_round, watchlist, evals_result=res3) - assert res['train']['auc'][-1] == res3['train']['auc'][-1] + auc_2, pred_2 = consist_test('approx_thread_2', 100) + + param['nthread'] = 3 + auc_3, pred_3 = consist_test('approx_thread_3', 100) + + assert auc_1 == auc_2 == auc_3 + assert np.array_equal(auc_1, auc_2) + assert np.array_equal(auc_1, auc_3) + + print('test hist ...') + param['tree_method'] = 'hist' + + param['nthread'] = 1 + auc_1, pred_1 = consist_test('hist_thread_1', 100) + + param['nthread'] = 2 + auc_2, pred_2 = consist_test('hist_thread_2', 100) + + param['nthread'] = 3 + auc_3, pred_3 = consist_test('hist_thread_3', 100) + + assert auc_1 == auc_2 == auc_3 + assert np.array_equal(auc_1, auc_2) + assert np.array_equal(auc_1, auc_3)