Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tree refactor stage I #3983

Closed
wants to merge 11 commits into from
711 changes: 162 additions & 549 deletions include/xgboost/tree_model.h

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion include/xgboost/tree_updater.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class TreeUpdater {
*/
virtual void Update(HostDeviceVector<GradientPair>* gpair,
DMatrix* data,
const std::vector<RegTree*>& trees) = 0;
const std::vector<RegressionTree*>& trees) = 0;

/*!
* \brief determines whether updater has enough knowledge about a given dataset
Expand Down
31 changes: 15 additions & 16 deletions src/gbm/gbtree.cc
Original file line number Diff line number Diff line change
Expand Up @@ -183,11 +183,11 @@ class GBTree : public GradientBooster {
void DoBoost(DMatrix* p_fmat,
HostDeviceVector<GradientPair>* in_gpair,
ObjFunction* obj) override {
std::vector<std::vector<std::unique_ptr<RegTree> > > new_trees;
std::vector<std::vector<std::unique_ptr<RegressionTree> > > new_trees;
const int ngroup = model_.param.num_output_group;
monitor_.Start("BoostNewTrees");
if (ngroup == 1) {
std::vector<std::unique_ptr<RegTree> > ret;
std::vector<std::unique_ptr<RegressionTree> > ret;
BoostNewTrees(in_gpair, p_fmat, 0, &ret);
new_trees.push_back(std::move(ret));
} else {
Expand All @@ -205,7 +205,7 @@ class GBTree : public GradientBooster {
for (bst_omp_uint i = 0; i < nsize; ++i) {
tmp_h[i] = gpair_h[i * ngroup + gid];
}
std::vector<std::unique_ptr<RegTree> > ret;
std::vector<std::unique_ptr<RegressionTree> > ret;
BoostNewTrees(&tmp, p_fmat, gid, &ret);
new_trees.push_back(std::move(ret));
}
Expand Down Expand Up @@ -273,17 +273,16 @@ class GBTree : public GradientBooster {
inline void BoostNewTrees(HostDeviceVector<GradientPair>* gpair,
DMatrix *p_fmat,
int bst_group,
std::vector<std::unique_ptr<RegTree> >* ret) {
std::vector<std::unique_ptr<RegressionTree> >* ret) {
this->InitUpdater();
std::vector<RegTree*> new_trees;
std::vector<RegressionTree*> new_trees;
ret->clear();
// create the trees
for (int i = 0; i < tparam_.num_parallel_tree; ++i) {
if (tparam_.process_type == kDefault) {
// create new tree
std::unique_ptr<RegTree> ptr(new RegTree());
std::unique_ptr<RegressionTree> ptr(new RegressionTree());
ptr->param.InitAllowUnknown(this->cfg_);
ptr->InitModel();
new_trees.push_back(ptr.get());
ret->push_back(std::move(ptr));
} else if (tparam_.process_type == kUpdate) {
Expand All @@ -303,7 +302,7 @@ class GBTree : public GradientBooster {

// commit new trees all at once
virtual void
CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees) {
CommitModel(std::vector<std::vector<std::unique_ptr<RegressionTree>>>&& new_trees) {
int num_new_trees = 0;
for (int gid = 0; gid < model_.param.num_output_group; ++gid) {
num_new_trees += new_trees[gid].size();
Expand Down Expand Up @@ -368,7 +367,7 @@ class Dart : public GBTree {
unsigned root_index) override {
DropTrees(1);
if (thread_temp_.size() == 0) {
thread_temp_.resize(1, RegTree::FVec());
thread_temp_.resize(1, DenseFeatureVector());
thread_temp_[0].Init(model_.param.num_feature);
}
out_preds->resize(model_.param.num_output_group);
Expand Down Expand Up @@ -447,7 +446,7 @@ class Dart : public GBTree {
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < nsize - rest; i += kUnroll) {
const int tid = omp_get_thread_num();
RegTree::FVec& feats = thread_temp_[tid];
DenseFeatureVector& feats = thread_temp_[tid];
int64_t ridx[kUnroll];
SparsePage::Inst inst[kUnroll];
for (int k = 0; k < kUnroll; ++k) {
Expand All @@ -466,7 +465,7 @@ class Dart : public GBTree {
}
}
for (bst_omp_uint i = nsize - rest; i < nsize; ++i) {
RegTree::FVec& feats = thread_temp_[0];
DenseFeatureVector& feats = thread_temp_[0];
const auto ridx = static_cast<int64_t>(batch.base_rowid + i);
const SparsePage::Inst inst = batch[i];
for (int gid = 0; gid < num_group; ++gid) {
Expand All @@ -481,7 +480,7 @@ class Dart : public GBTree {

// commit new trees all at once
void
CommitModel(std::vector<std::vector<std::unique_ptr<RegTree>>>&& new_trees) override {
CommitModel(std::vector<std::vector<std::unique_ptr<RegressionTree>>>&& new_trees) override {
int num_new_trees = 0;
for (int gid = 0; gid < model_.param.num_output_group; ++gid) {
num_new_trees += new_trees[gid].size();
Expand All @@ -498,7 +497,7 @@ class Dart : public GBTree {
inline bst_float PredValue(const SparsePage::Inst &inst,
int bst_group,
unsigned root_index,
RegTree::FVec *p_feats,
DenseFeatureVector *p_feats,
unsigned tree_begin,
unsigned tree_end) {
bst_float psum = 0.0f;
Expand All @@ -508,7 +507,7 @@ class Dart : public GBTree {
bool drop = (std::binary_search(idx_drop_.begin(), idx_drop_.end(), i));
if (!drop) {
int tid = model_.trees[i]->GetLeafIndex(*p_feats, root_index);
psum += weight_drop_[i] * (*model_.trees[i])[tid].LeafValue();
psum += weight_drop_[i] * model_.trees[i]->GetNode(tid).LeafValue();
}
}
}
Expand Down Expand Up @@ -600,7 +599,7 @@ class Dart : public GBTree {
inline void InitThreadTemp(int nthread) {
int prev_thread_temp_size = thread_temp_.size();
if (prev_thread_temp_size < nthread) {
thread_temp_.resize(nthread, RegTree::FVec());
thread_temp_.resize(nthread, DenseFeatureVector());
for (int i = prev_thread_temp_size; i < nthread; ++i) {
thread_temp_[i].Init(model_.param.num_feature);
}
Expand All @@ -615,7 +614,7 @@ class Dart : public GBTree {
// indexes of dropped trees
std::vector<size_t> idx_drop_;
// temporal storage for per thread
std::vector<RegTree::FVec> thread_temp_;
std::vector<DenseFeatureVector> thread_temp_;
};

// register the objective functions
Expand Down
8 changes: 4 additions & 4 deletions src/gbm/gbtree_model.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ struct GBTreeModel {
trees.clear();
trees_to_update.clear();
for (int i = 0; i < param.num_trees; ++i) {
std::unique_ptr<RegTree> ptr(new RegTree());
std::unique_ptr<RegressionTree> ptr(new RegressionTree());
ptr->Load(fi);
trees.push_back(std::move(ptr));
}
Expand Down Expand Up @@ -116,7 +116,7 @@ struct GBTreeModel {
}
return dump;
}
void CommitModel(std::vector<std::unique_ptr<RegTree> >&& new_trees,
void CommitModel(std::vector<std::unique_ptr<RegressionTree> >&& new_trees,
int bst_group) {
for (auto & new_tree : new_trees) {
trees.push_back(std::move(new_tree));
Expand All @@ -130,9 +130,9 @@ struct GBTreeModel {
// model parameter
GBTreeModelParam param;
/*! \brief vector of trees stored in the model */
std::vector<std::unique_ptr<RegTree> > trees;
std::vector<std::unique_ptr<RegressionTree> > trees;
/*! \brief for the update process, a place to keep the initial trees */
std::vector<std::unique_ptr<RegTree> > trees_to_update;
std::vector<std::unique_ptr<RegressionTree> > trees_to_update;
/*! \brief some information indicator of the tree, reserved */
std::vector<int> tree_info;
};
Expand Down
24 changes: 12 additions & 12 deletions src/predictor/cpu_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,16 @@ DMLC_REGISTRY_FILE_TAG(cpu_predictor);
class CPUPredictor : public Predictor {
protected:
static bst_float PredValue(const SparsePage::Inst& inst,
const std::vector<std::unique_ptr<RegTree>>& trees,
const std::vector<std::unique_ptr<RegressionTree>>& trees,
const std::vector<int>& tree_info, int bst_group,
unsigned root_index, RegTree::FVec* p_feats,
unsigned root_index, DenseFeatureVector* p_feats,
unsigned tree_begin, unsigned tree_end) {
bst_float psum = 0.0f;
p_feats->Fill(inst);
for (size_t i = tree_begin; i < tree_end; ++i) {
if (tree_info[i] == bst_group) {
int tid = trees[i]->GetLeafIndex(*p_feats, root_index);
psum += (*trees[i])[tid].LeafValue();
psum += (*trees[i]).GetNode(tid).LeafValue();
}
}
p_feats->Drop(inst);
Expand All @@ -35,7 +35,7 @@ class CPUPredictor : public Predictor {
inline void InitThreadTemp(int nthread, int num_feature) {
int prev_thread_temp_size = thread_temp.size();
if (prev_thread_temp_size < nthread) {
thread_temp.resize(nthread, RegTree::FVec());
thread_temp.resize(nthread, DenseFeatureVector());
for (int i = prev_thread_temp_size; i < nthread; ++i) {
thread_temp[i].Init(num_feature);
}
Expand All @@ -61,7 +61,7 @@ class CPUPredictor : public Predictor {
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < nsize - rest; i += kUnroll) {
const int tid = omp_get_thread_num();
RegTree::FVec& feats = thread_temp[tid];
DenseFeatureVector& feats = thread_temp[tid];
int64_t ridx[kUnroll];
SparsePage::Inst inst[kUnroll];
for (int k = 0; k < kUnroll; ++k) {
Expand All @@ -80,7 +80,7 @@ class CPUPredictor : public Predictor {
}
}
for (bst_omp_uint i = nsize - rest; i < nsize; ++i) {
RegTree::FVec& feats = thread_temp[0];
DenseFeatureVector& feats = thread_temp[0];
const auto ridx = static_cast<int64_t>(batch.base_rowid + i);
auto inst = batch[i];
for (int gid = 0; gid < num_group; ++gid) {
Expand Down Expand Up @@ -200,7 +200,7 @@ class CPUPredictor : public Predictor {
const gbm::GBTreeModel& model, unsigned ntree_limit,
unsigned root_index) override {
if (thread_temp.size() == 0) {
thread_temp.resize(1, RegTree::FVec());
thread_temp.resize(1, DenseFeatureVector());
thread_temp[0].Init(model.param.num_feature);
}
ntree_limit *= model.param.num_output_group;
Expand Down Expand Up @@ -237,7 +237,7 @@ class CPUPredictor : public Predictor {
for (bst_omp_uint i = 0; i < nsize; ++i) {
const int tid = omp_get_thread_num();
auto ridx = static_cast<size_t>(batch.base_rowid + i);
RegTree::FVec& feats = thread_temp[tid];
DenseFeatureVector& feats = thread_temp[tid];
feats.Fill(batch[i]);
for (unsigned j = 0; j < ntree_limit; ++j) {
int tid = model.trees[j]->GetLeafIndex(feats, info.GetRoot(ridx));
Expand Down Expand Up @@ -270,9 +270,9 @@ class CPUPredictor : public Predictor {
// allocated one
std::fill(contribs.begin(), contribs.end(), 0);
// initialize tree node mean values
#pragma omp parallel for schedule(static)
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < ntree_limit; ++i) {
model.trees[i]->FillNodeMeanValues();
model.trees[i]->FillNodeMeanValue();
}
const std::vector<bst_float>& base_margin = info.base_margin_.HostVector();
// start collecting the contributions
Expand All @@ -283,7 +283,7 @@ class CPUPredictor : public Predictor {
for (bst_omp_uint i = 0; i < nsize; ++i) {
auto row_idx = static_cast<size_t>(batch.base_rowid + i);
unsigned root_id = info.GetRoot(row_idx);
RegTree::FVec& feats = thread_temp[omp_get_thread_num()];
DenseFeatureVector& feats = thread_temp[omp_get_thread_num()];
// loop over all classes
for (int gid = 0; gid < ngroup; ++gid) {
bst_float* p_contribs =
Expand Down Expand Up @@ -356,7 +356,7 @@ class CPUPredictor : public Predictor {
}
}
}
std::vector<RegTree::FVec> thread_temp;
std::vector<DenseFeatureVector> thread_temp;
};

XGBOOST_REGISTER_PREDICTOR(CPUPredictor, "cpu_predictor")
Expand Down
2 changes: 1 addition & 1 deletion src/predictor/gpu_predictor.cu
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ struct DevicePredictionNode {
int right_child_idx;
NodeValue val;

DevicePredictionNode(const RegTree::Node& n) { // NOLINT
DevicePredictionNode(const RegressionTree::Node& n) { // NOLINT
static_assert(sizeof(DevicePredictionNode) == 16, "Size is not 16 bytes");
this->left_child_idx = n.LeftChild();
this->right_child_idx = n.RightChild();
Expand Down
Loading