From 0d46ffdb4eaa834c7aa6d3328bdcb6eae89c437b Mon Sep 17 00:00:00 2001 From: fis Date: Thu, 26 Mar 2020 16:35:13 +0800 Subject: [PATCH] cleanup. --- src/predictor/cpu_predictor.cc | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/predictor/cpu_predictor.cc b/src/predictor/cpu_predictor.cc index 204fdff9e4ff..2fc15731df20 100644 --- a/src/predictor/cpu_predictor.cc +++ b/src/predictor/cpu_predictor.cc @@ -42,9 +42,11 @@ bst_float PredValue(const SparsePage::Inst &inst, return psum; } +template struct SparsePageView { SparsePage const* page; bst_row_t base_rowid; + static size_t constexpr kUnroll = kUnrollLen; explicit SparsePageView(SparsePage const *p) : page{p}, base_rowid{page->base_rowid} { @@ -56,13 +58,16 @@ struct SparsePageView { size_t Size() const { return page->Size(); } }; -template +template class AdapterView { Adapter* adapter_; float missing_; common::Span workspace_; std::vector current_unroll_; + public: + static size_t constexpr kUnroll = kUnrollLen; + public: explicit AdapterView(Adapter *adapter, float missing, common::Span workplace) @@ -108,8 +113,8 @@ void PredictBatchKernel(DataView batch, std::vector *out_preds, CHECK_EQ(model.param.size_leaf_vector, 0) << "size_leaf_vector is enforced to 0 so far"; // parallel over local batch - constexpr int kUnroll = 8; const auto nsize = static_cast(batch.Size()); + auto constexpr kUnroll = DataView::kUnroll; const bst_omp_uint rest = nsize % kUnroll; if (nsize >= kUnroll) { #pragma omp parallel for schedule(static) @@ -118,13 +123,13 @@ void PredictBatchKernel(DataView batch, std::vector *out_preds, RegTree::FVec &feats = thread_temp[tid]; int64_t ridx[kUnroll]; SparsePage::Inst inst[kUnroll]; - for (int k = 0; k < kUnroll; ++k) { + for (size_t k = 0; k < kUnroll; ++k) { ridx[k] = static_cast(batch.base_rowid + i + k); } - for (int k = 0; k < kUnroll; ++k) { + for (size_t k = 0; k < kUnroll; ++k) { inst[k] = batch[i + k]; } - for (int k = 0; k < kUnroll; ++k) { + for (size_t k = 0; k < kUnroll; ++k) { for (int gid = 0; gid < num_group; ++gid) { const size_t offset = ridx[k] * num_group + gid; preds[offset] += PredValue(inst[k], model.trees, model.tree_info, gid, @@ -167,7 +172,8 @@ class CPUPredictor : public Predictor { for (auto const& batch : p_fmat->GetBatches()) { CHECK_EQ(out_preds->size(), p_fmat->Info().num_row_ * model.learner_model_param_->num_output_group); - PredictBatchKernel(SparsePageView{&batch}, out_preds, model, tree_begin, + size_t constexpr kUnroll = 8; + PredictBatchKernel(SparsePageView{&batch}, out_preds, model, tree_begin, tree_end, &thread_temp_); } } @@ -265,7 +271,6 @@ class CPUPredictor : public Predictor { PredictionCacheEntry *out_preds, uint32_t tree_begin, uint32_t tree_end) const { auto threads = omp_get_max_threads(); - size_t constexpr kUnroll = 8; auto m = dmlc::get(x); CHECK_EQ(m.NumColumns(), model.learner_model_param_->num_feature) << "Number of columns in data must equal to trained model."; @@ -277,6 +282,7 @@ class CPUPredictor : public Predictor { auto &predictions = out_preds->predictions.HostVector(); std::vector thread_temp; InitThreadTemp(threads, model.learner_model_param_->num_feature, &thread_temp); + size_t constexpr kUnroll = 8; PredictBatchKernel(AdapterView( &m, missing, common::Span{workspace}), &predictions, model, tree_begin, tree_end, &thread_temp);