Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use Monitor in quantile hist. #4273

Merged
merged 1 commit into from
Mar 20, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 16 additions & 28 deletions src/tree/updater_quantile_hist.cc
Original file line number Diff line number Diff line change
Expand Up @@ -92,15 +92,15 @@ void QuantileHistMaker::Builder::SyncHistograms(
int starting_index,
int sync_count,
RegTree *p_tree) {
perf_monitor.TickStart();
builder_monitor_.Start("SyncHistograms");
this->histred_.Allreduce(hist_[starting_index].data(), hist_builder_.GetNumBins() * sync_count);
// use Subtraction Trick
for (auto const& node_pair : nodes_for_subtraction_trick_) {
hist_.AddHistRow(node_pair.first);
SubtractionTrick(hist_[node_pair.first], hist_[node_pair.second],
hist_[(*p_tree)[node_pair.first].Parent()]);
}
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::BUILD_HIST);
builder_monitor_.Stop("SyncHistograms");
}

void QuantileHistMaker::Builder::BuildLocalHistograms(
Expand All @@ -110,7 +110,7 @@ void QuantileHistMaker::Builder::BuildLocalHistograms(
const GHistIndexBlockMatrix &gmatb,
RegTree *p_tree,
const std::vector<GradientPair> &gpair_h) {
perf_monitor.TickStart();
builder_monitor_.Start("BuildLocalHistograms");
for (auto const& entry : qexpand_depth_wise_) {
int nid = entry.nid;
RegTree::Node &node = (*p_tree)[nid];
Expand Down Expand Up @@ -150,15 +150,15 @@ void QuantileHistMaker::Builder::BuildLocalHistograms(
}
}
}
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::BUILD_HIST);
builder_monitor_.Stop("BuildLocalHistograms");
}

void QuantileHistMaker::Builder::BuildNodeStats(
const GHistIndexMatrix &gmat,
DMatrix *p_fmat,
RegTree *p_tree,
const std::vector<GradientPair> &gpair_h) {
perf_monitor.TickStart();
builder_monitor_.Start("BuildNodeStats");
for (auto const& entry : qexpand_depth_wise_) {
int nid = entry.nid;
this->InitNewNode(nid, gmat, gpair_h, *p_fmat, *p_tree);
Expand All @@ -172,7 +172,7 @@ void QuantileHistMaker::Builder::BuildNodeStats(
snode_[left_sibling_id].weight, snode_[nid].weight);
}
}
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::INIT_NEW_NODE);
builder_monitor_.Stop("BuildNodeStats");
}

void QuantileHistMaker::Builder::EvaluateSplits(
Expand All @@ -186,17 +186,13 @@ void QuantileHistMaker::Builder::EvaluateSplits(
std::vector<ExpandEntry> *temp_qexpand_depth) {
for (auto const& entry : qexpand_depth_wise_) {
int nid = entry.nid;
perf_monitor.TickStart();
this->EvaluateSplit(nid, gmat, hist_, *p_fmat, *p_tree);
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::EVALUATE_SPLIT);
if (snode_[nid].best.loss_chg < kRtEps ||
(param_.max_depth > 0 && depth == param_.max_depth) ||
(param_.max_leaves > 0 && (*num_leaves) == param_.max_leaves)) {
(*p_tree)[nid].SetLeaf(snode_[nid].weight * param_.learning_rate);
} else {
perf_monitor.TickStart();
this->ApplySplit(nid, gmat, column_matrix, hist_, *p_fmat, p_tree);
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::APPLY_SPLIT);
int left_id = (*p_tree)[nid].LeftChild();
int right_id = (*p_tree)[nid].RightChild();
temp_qexpand_depth->push_back(ExpandEntry(left_id,
Expand Down Expand Up @@ -255,18 +251,12 @@ void QuantileHistMaker::Builder::ExpandWithLossGuide(
int num_leaves = 0;

for (int nid = 0; nid < p_tree->param.num_roots; ++nid) {
perf_monitor.TickStart();
hist_.AddHistRow(nid);
BuildHist(gpair_h, row_set_collection_[nid], gmat, gmatb, hist_[nid], true);
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::BUILD_HIST);

perf_monitor.TickStart();
this->InitNewNode(nid, gmat, gpair_h, *p_fmat, *p_tree);
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::INIT_NEW_NODE);

perf_monitor.TickStart();
this->EvaluateSplit(nid, gmat, hist_, *p_fmat, *p_tree);
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::EVALUATE_SPLIT);
qexpand_loss_guided_->push(ExpandEntry(nid, p_tree->GetDepth(nid),
snode_[nid].best.loss_chg,
timestamp++));
Expand All @@ -282,16 +272,13 @@ void QuantileHistMaker::Builder::ExpandWithLossGuide(
|| (param_.max_leaves > 0 && num_leaves == param_.max_leaves) ) {
(*p_tree)[nid].SetLeaf(snode_[nid].weight * param_.learning_rate);
} else {
perf_monitor.TickStart();
this->ApplySplit(nid, gmat, column_matrix, hist_, *p_fmat, p_tree);
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::APPLY_SPLIT);

const int cleft = (*p_tree)[nid].LeftChild();
const int cright = (*p_tree)[nid].RightChild();
hist_.AddHistRow(cleft);
hist_.AddHistRow(cright);

perf_monitor.TickStart();
if (rabit::IsDistributed()) {
// in distributed mode, we need to keep consistent across workers
BuildHist(gpair_h, row_set_collection_[cleft], gmat, gmatb, hist_[cleft], true);
Expand All @@ -305,20 +292,15 @@ void QuantileHistMaker::Builder::ExpandWithLossGuide(
SubtractionTrick(hist_[cleft], hist_[cright], hist_[nid]);
}
}
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::BUILD_HIST);

perf_monitor.TickStart();
this->InitNewNode(cleft, gmat, gpair_h, *p_fmat, *p_tree);
this->InitNewNode(cright, gmat, gpair_h, *p_fmat, *p_tree);
bst_uint featureid = snode_[nid].best.SplitIndex();
spliteval_->AddSplit(nid, cleft, cright, featureid,
snode_[cleft].weight, snode_[cright].weight);
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::INIT_NEW_NODE);

perf_monitor.TickStart();
this->EvaluateSplit(cleft, gmat, hist_, *p_fmat, *p_tree);
this->EvaluateSplit(cright, gmat, hist_, *p_fmat, *p_tree);
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::EVALUATE_SPLIT);

qexpand_loss_guided_->push(ExpandEntry(cleft, p_tree->GetDepth(cleft),
snode_[cleft].best.loss_chg,
Expand All @@ -338,15 +320,13 @@ void QuantileHistMaker::Builder::Update(const GHistIndexMatrix& gmat,
HostDeviceVector<GradientPair>* gpair,
DMatrix* p_fmat,
RegTree* p_tree) {
perf_monitor.StartPerfMonitor();
builder_monitor_.Start("Update");

const std::vector<GradientPair>& gpair_h = gpair->ConstHostVector();

spliteval_->Reset();

perf_monitor.TickStart();
this->InitData(gmat, gpair_h, *p_fmat, *p_tree);
perf_monitor.UpdatePerfTimer(TreeGrowingPerfMonitor::timer_name::INIT_DATA);

if (param_.grow_policy == TrainParam::kLossGuide) {
ExpandWithLossGuide(gmat, gmatb, column_matrix, p_fmat, p_tree, gpair_h);
Expand All @@ -362,7 +342,7 @@ void QuantileHistMaker::Builder::Update(const GHistIndexMatrix& gmat,

pruner_->Update(gpair, p_fmat, std::vector<RegTree*>{p_tree});

perf_monitor.EndPerfMonitor();
builder_monitor_.Stop("Update");
}

bool QuantileHistMaker::Builder::UpdatePredictionCache(
Expand Down Expand Up @@ -419,6 +399,7 @@ void QuantileHistMaker::Builder::InitData(const GHistIndexMatrix& gmat,
CHECK(param_.max_depth > 0) << "max_depth cannot be 0 (unlimited) "
<< "when grow_policy is depthwise.";
}
builder_monitor_.Start("InitData");
const auto& info = fmat.Info();

{
Expand Down Expand Up @@ -519,13 +500,15 @@ void QuantileHistMaker::Builder::InitData(const GHistIndexMatrix& gmat,
qexpand_depth_wise_.clear();
}
}
builder_monitor_.Stop("InitData");
}

void QuantileHistMaker::Builder::EvaluateSplit(const int nid,
const GHistIndexMatrix& gmat,
const HistCollection& hist,
const DMatrix& fmat,
const RegTree& tree) {
builder_monitor_.Start("EvaluateSplit");
// start enumeration
const MetaInfo& info = fmat.Info();
auto p_feature_set = column_sampler_.GetFeatureSet(tree.GetDepth(nid));
Expand All @@ -550,6 +533,7 @@ void QuantileHistMaker::Builder::EvaluateSplit(const int nid,
for (unsigned tid = 0; tid < nthread; ++tid) {
snode_[nid].best.Update(best_split_tloc_[tid]);
}
builder_monitor_.Stop("EvaluateSplit");
}

void QuantileHistMaker::Builder::ApplySplit(int nid,
Expand All @@ -558,6 +542,7 @@ void QuantileHistMaker::Builder::ApplySplit(int nid,
const HistCollection& hist,
const DMatrix& fmat,
RegTree* p_tree) {
builder_monitor_.Start("ApplySplit");
// TODO(hcho3): support feature sampling by levels

/* 1. Create child nodes */
Expand Down Expand Up @@ -606,6 +591,7 @@ void QuantileHistMaker::Builder::ApplySplit(int nid,

row_set_collection_.AddSplit(
nid, row_split_tloc_, (*p_tree)[nid].LeftChild(), (*p_tree)[nid].RightChild());
builder_monitor_.Stop("ApplySplit");
}

void QuantileHistMaker::Builder::ApplySplitDenseData(
Expand Down Expand Up @@ -744,6 +730,7 @@ void QuantileHistMaker::Builder::InitNewNode(int nid,
const std::vector<GradientPair>& gpair,
const DMatrix& fmat,
const RegTree& tree) {
builder_monitor_.Start("InitNewNode");
{
snode_.resize(tree.param.num_nodes, NodeEntry(param_));
}
Expand Down Expand Up @@ -786,6 +773,7 @@ void QuantileHistMaker::Builder::InitNewNode(int nid,
snode_[nid].root_gain = static_cast<float>(
spliteval_->ComputeScore(parentid, snode_[nid].stats, snode_[nid].weight));
}
builder_monitor_.Stop("InitNewNode");
}

// enumerate the split values of specific feature
Expand Down
89 changes: 9 additions & 80 deletions src/tree/updater_quantile_hist.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "./param.h"
#include "./split_evaluator.h"
#include "../common/random.h"
#include "../common/timer.h"
#include "../common/hist_util.h"
#include "../common/row_set.h"
#include "../common/column_matrix.h"
Expand Down Expand Up @@ -87,7 +88,9 @@ class QuantileHistMaker: public TreeUpdater {
std::unique_ptr<SplitEvaluator> spliteval)
: param_(param), pruner_(std::move(pruner)),
spliteval_(std::move(spliteval)), p_last_tree_(nullptr),
p_last_fmat_(nullptr) {}
p_last_fmat_(nullptr) {
builder_monitor_.Init("Quantile::Builder");
}
// update one tree, growing
virtual void Update(const GHistIndexMatrix& gmat,
const GHistIndexBlockMatrix& gmatb,
Expand All @@ -102,6 +105,7 @@ class QuantileHistMaker: public TreeUpdater {
const GHistIndexBlockMatrix& gmatb,
GHistRow hist,
bool sync_hist) {
builder_monitor_.Start("BuildHist");
if (param_.enable_feature_grouping > 0) {
hist_builder_.BuildBlockHist(gpair, row_indices, gmatb, hist);
} else {
Expand All @@ -110,10 +114,13 @@ class QuantileHistMaker: public TreeUpdater {
if (sync_hist) {
this->histred_.Allreduce(hist.data(), hist_builder_.GetNumBins());
}
builder_monitor_.Stop("BuildHist");
}

inline void SubtractionTrick(GHistRow self, GHistRow sibling, GHistRow parent) {
builder_monitor_.Start("SubtractionTrick");
hist_builder_.SubtractionTrick(self, sibling, parent);
builder_monitor_.Stop("SubtractionTrick");
}

bool UpdatePredictionCache(const DMatrix* data,
Expand All @@ -130,84 +137,6 @@ class QuantileHistMaker: public TreeUpdater {
: nid(nid), depth(depth), loss_chg(loss_chg), timestamp(tstmp) {}
};

struct TreeGrowingPerfMonitor {
enum timer_name {INIT_DATA, INIT_NEW_NODE, BUILD_HIST, EVALUATE_SPLIT, APPLY_SPLIT};

double global_start;

// performance counters
double tstart;
double time_init_data = 0;
double time_init_new_node = 0;
double time_build_hist = 0;
double time_evaluate_split = 0;
double time_apply_split = 0;

inline void StartPerfMonitor() {
global_start = dmlc::GetTime();
}

inline void EndPerfMonitor() {
CHECK_GT(global_start, 0);
double total_time = dmlc::GetTime() - global_start;
LOG(INFO) << "\nInitData: "
<< std::fixed << std::setw(6) << std::setprecision(4) << time_init_data
<< " (" << std::fixed << std::setw(5) << std::setprecision(2)
<< time_init_data / total_time * 100 << "%)\n"
<< "InitNewNode: "
<< std::fixed << std::setw(6) << std::setprecision(4) << time_init_new_node
<< " (" << std::fixed << std::setw(5) << std::setprecision(2)
<< time_init_new_node / total_time * 100 << "%)\n"
<< "BuildHist: "
<< std::fixed << std::setw(6) << std::setprecision(4) << time_build_hist
<< " (" << std::fixed << std::setw(5) << std::setprecision(2)
<< time_build_hist / total_time * 100 << "%)\n"
<< "EvaluateSplit: "
<< std::fixed << std::setw(6) << std::setprecision(4) << time_evaluate_split
<< " (" << std::fixed << std::setw(5) << std::setprecision(2)
<< time_evaluate_split / total_time * 100 << "%)\n"
<< "ApplySplit: "
<< std::fixed << std::setw(6) << std::setprecision(4) << time_apply_split
<< " (" << std::fixed << std::setw(5) << std::setprecision(2)
<< time_apply_split / total_time * 100 << "%)\n"
<< "========================================\n"
<< "Total: "
<< std::fixed << std::setw(6) << std::setprecision(4) << total_time;
// clear performance counters
time_init_data = 0;
time_init_new_node = 0;
time_build_hist = 0;
time_evaluate_split = 0;
time_apply_split = 0;
}

inline void TickStart() {
tstart = dmlc::GetTime();
}

inline void UpdatePerfTimer(const timer_name &timer_name) {
CHECK_GT(tstart, 0);
switch (timer_name) {
case INIT_DATA:
time_init_data += dmlc::GetTime() - tstart;
break;
case INIT_NEW_NODE:
time_init_new_node += dmlc::GetTime() - tstart;
break;
case BUILD_HIST:
time_build_hist += dmlc::GetTime() - tstart;
break;
case EVALUATE_SPLIT:
time_evaluate_split += dmlc::GetTime() - tstart;
break;
case APPLY_SPLIT:
time_apply_split += dmlc::GetTime() - tstart;
break;
}
tstart = -1;
}
};

// initialize temp data structure
void InitData(const GHistIndexMatrix& gmat,
const std::vector<GradientPair>& gpair,
Expand Down Expand Up @@ -347,7 +276,7 @@ class QuantileHistMaker: public TreeUpdater {
enum DataLayout { kDenseDataZeroBased, kDenseDataOneBased, kSparseData };
DataLayout data_layout_;

TreeGrowingPerfMonitor perf_monitor;
common::Monitor builder_monitor_;
rabit::Reducer<GradStats, GradStats::Reduce> histred_;
};

Expand Down