Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Testing hist_util #5251

Merged
merged 9 commits into from
Feb 14, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 17 additions & 18 deletions src/common/hist_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ void SparseCuts::SingleThreadBuild(SparsePage const& page, MetaInfo const& info,
bool const use_group_ind,
uint32_t beg_col, uint32_t end_col,
uint32_t thread_id) {
using WXQSketch = common::WXQuantileSketch<bst_float, bst_float>;
CHECK_GE(end_col, beg_col);
constexpr float kFactor = 8;

Expand All @@ -80,7 +79,7 @@ void SparseCuts::SingleThreadBuild(SparsePage const& page, MetaInfo const& info,

for (uint32_t col_id = beg_col; col_id < page.Size() && col_id < end_col; ++col_id) {
// Using a local variable makes things easier, but at the cost of memory trashing.
WXQSketch sketch;
WQSketch sketch;
common::Span<xgboost::Entry const> const column = page[col_id];
uint32_t const n_bins = std::min(static_cast<uint32_t>(column.size()),
max_num_bins);
Expand All @@ -104,18 +103,18 @@ void SparseCuts::SingleThreadBuild(SparsePage const& page, MetaInfo const& info,
sketch.Push(entry.fvalue, info.GetWeight(weight_ind));
}

WXQSketch::SummaryContainer out_summary;
WQSketch::SummaryContainer out_summary;
sketch.GetSummary(&out_summary);
WXQSketch::SummaryContainer summary;
summary.Reserve(n_bins);
summary.SetPrune(out_summary, n_bins);
WQSketch::SummaryContainer summary;
summary.Reserve(n_bins + 1);
summary.SetPrune(out_summary, n_bins + 1);

// Can be use data[1] as the min values so that we don't need to
// store another array?
float mval = summary.data[0].value;
p_cuts_->min_vals_[col_id - beg_col] = mval - (fabs(mval) + 1e-5);

this->AddCutPoint(summary);
this->AddCutPoint(summary, max_num_bins);

bst_float cpt = (summary.size > 0) ?
summary.data[summary.size - 1].value :
Expand Down Expand Up @@ -234,7 +233,7 @@ void DenseCuts::Build(DMatrix* p_fmat, uint32_t max_num_bins) {

// safe factor for better accuracy
constexpr int kFactor = 8;
std::vector<WXQSketch> sketchs;
std::vector<WQSketch> sketchs;

const int nthread = omp_get_max_threads();

Expand Down Expand Up @@ -292,34 +291,34 @@ void DenseCuts::Build(DMatrix* p_fmat, uint32_t max_num_bins) {
}

void DenseCuts::Init
(std::vector<WXQSketch>* in_sketchs, uint32_t max_num_bins) {
(std::vector<WQSketch>* in_sketchs, uint32_t max_num_bins) {
monitor_.Start(__func__);
std::vector<WXQSketch>& sketchs = *in_sketchs;
std::vector<WQSketch>& sketchs = *in_sketchs;
constexpr int kFactor = 8;
// gather the histogram data
rabit::SerializeReducer<WXQSketch::SummaryContainer> sreducer;
std::vector<WXQSketch::SummaryContainer> summary_array;
rabit::SerializeReducer<WQSketch::SummaryContainer> sreducer;
std::vector<WQSketch::SummaryContainer> summary_array;
summary_array.resize(sketchs.size());
for (size_t i = 0; i < sketchs.size(); ++i) {
WXQSketch::SummaryContainer out;
WQSketch::SummaryContainer out;
sketchs[i].GetSummary(&out);
summary_array[i].Reserve(max_num_bins * kFactor);
summary_array[i].SetPrune(out, max_num_bins * kFactor);
}
CHECK_EQ(summary_array.size(), in_sketchs->size());
size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_num_bins * kFactor);
size_t nbytes = WQSketch::SummaryContainer::CalcMemCost(max_num_bins * kFactor);
// TODO(chenqin): rabit failure recovery assumes no boostrap onetime call after loadcheckpoint
// we need to move this allreduce before loadcheckpoint call in future
sreducer.Allreduce(dmlc::BeginPtr(summary_array), nbytes, summary_array.size());
p_cuts_->min_vals_.resize(sketchs.size());

for (size_t fid = 0; fid < summary_array.size(); ++fid) {
WXQSketch::SummaryContainer a;
a.Reserve(max_num_bins);
a.SetPrune(summary_array[fid], max_num_bins);
WQSketch::SummaryContainer a;
a.Reserve(max_num_bins + 1);
a.SetPrune(summary_array[fid], max_num_bins + 1);
const bst_float mval = a.data[0].value;
p_cuts_->min_vals_[fid] = mval - (fabs(mval) + 1e-5);
AddCutPoint(a);
AddCutPoint(a, max_num_bins);
// push a value that is greater than anything
const bst_float cpt
= (a.size > 0) ? a.data[a.size - 1].value : p_cuts_->min_vals_[fid];
Expand Down
20 changes: 10 additions & 10 deletions src/common/hist_util.cu
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@
namespace xgboost {
namespace common {

using WXQSketch = DenseCuts::WXQSketch;
using WQSketch = DenseCuts::WQSketch;

__global__ void FindCutsK(WXQSketch::Entry* __restrict__ cuts,
__global__ void FindCutsK(WQSketch::Entry* __restrict__ cuts,
const bst_float* __restrict__ data,
const float* __restrict__ cum_weights,
int nsamples,
Expand All @@ -52,7 +52,7 @@ __global__ void FindCutsK(WXQSketch::Entry* __restrict__ cuts,
// repeated values will be filtered out on the CPU
bst_float rmin = isample > 0 ? cum_weights[isample - 1] : 0;
bst_float rmax = cum_weights[isample];
cuts[icut] = WXQSketch::Entry(rmin, rmax, rmax - rmin, data[isample]);
cuts[icut] = WQSketch::Entry(rmin, rmax, rmax - rmin, data[isample]);
}

// predictate for thrust filtering that returns true if the element is not a NaN
Expand Down Expand Up @@ -97,7 +97,7 @@ __global__ void UnpackFeaturesK(float* __restrict__ fvalues,
* across distinct rows.
*/
struct SketchContainer {
std::vector<DenseCuts::WXQSketch> sketches_; // NOLINT
std::vector<DenseCuts::WQSketch> sketches_; // NOLINT
std::vector<std::mutex> col_locks_; // NOLINT
static constexpr int kOmpNumColsParallelizeLimit = 1000;

Expand Down Expand Up @@ -245,11 +245,11 @@ class GPUSketcher {
if (n_cuts_ > n_unique) {
float* weights2_ptr = weights2_.data().get();
float* fvalues_ptr = fvalues_cur_.data().get();
WXQSketch::Entry* cuts_ptr = cuts_d_.data().get() + icol * n_cuts_;
WQSketch::Entry* cuts_ptr = cuts_d_.data().get() + icol * n_cuts_;
dh::LaunchN(device_, n_unique, [=]__device__(size_t i) {
bst_float rmax = weights2_ptr[i];
bst_float rmin = i > 0 ? weights2_ptr[i - 1] : 0;
cuts_ptr[i] = WXQSketch::Entry(rmin, rmax, rmax - rmin, fvalues_ptr[i]);
cuts_ptr[i] = WQSketch::Entry(rmin, rmax, rmax - rmin, fvalues_ptr[i]);
});
} else if (n_cuts_cur_[icol] > 0) {
// if more elements than cuts: use binary search on cumulative weights
Expand Down Expand Up @@ -287,7 +287,7 @@ class GPUSketcher {
constexpr int kFactor = 8;
double eps = 1.0 / (kFactor * max_bin_);
size_t dummy_nlevel;
WXQSketch::LimitSizeLevel(gpu_batch_nrows_, eps, &dummy_nlevel, &n_cuts_);
WQSketch::LimitSizeLevel(gpu_batch_nrows_, eps, &dummy_nlevel, &n_cuts_);

// allocate necessary GPU buffers
dh::safe_cuda(cudaSetDevice(device_));
Expand Down Expand Up @@ -425,7 +425,7 @@ class GPUSketcher {
#pragma omp parallel for default(none) schedule(static) \
if (num_cols_ > SketchContainer::kOmpNumColsParallelizeLimit) // NOLINT
for (int icol = 0; icol < num_cols_; ++icol) {
WXQSketch::SummaryContainer summary;
WQSketch::SummaryContainer summary;
summary.Reserve(n_cuts_);
summary.MakeFromSorted(&cuts_h_[n_cuts_ * icol], n_cuts_cur_[icol]);

Expand All @@ -450,8 +450,8 @@ if (num_cols_ > SketchContainer::kOmpNumColsParallelizeLimit) // NOLINT
dh::device_vector<bst_float> fvalues_{};
dh::device_vector<bst_float> feature_weights_{};
dh::device_vector<bst_float> fvalues_cur_{};
dh::device_vector<WXQSketch::Entry> cuts_d_{};
thrust::host_vector<WXQSketch::Entry> cuts_h_{};
dh::device_vector<WQSketch::Entry> cuts_d_{};
thrust::host_vector<WQSketch::Entry> cuts_h_{};
dh::device_vector<bst_float> weights_{};
dh::device_vector<bst_float> weights2_{};
std::vector<size_t> n_cuts_cur_{};
Expand Down
30 changes: 12 additions & 18 deletions src/common/hist_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ struct SimpleArray {
using GHistIndexRow = Span<uint32_t const>;

// A CSC matrix representing histogram cuts, used in CPU quantile hist.
// The cut values represent upper bounds of bins containing approximately equal numbers of elements
class HistogramCuts {
// Using friends to avoid creating a virtual class, since HistogramCuts is used as value
// object in many places.
Expand Down Expand Up @@ -147,7 +148,9 @@ class HistogramCuts {

size_t TotalBins() const { return cut_ptrs_.back(); }

BinIdx SearchBin(float value, uint32_t column_id) {
// Return the index of a cut point that is strictly greater than the input
// value, or the last available index if none exists
BinIdx SearchBin(float value, uint32_t column_id) const {
auto beg = cut_ptrs_.at(column_id);
auto end = cut_ptrs_.at(column_id + 1);
auto it = std::upper_bound(cut_values_.cbegin() + beg, cut_values_.cbegin() + end, value);
Expand All @@ -171,7 +174,7 @@ class HistogramCuts {
*/
class CutsBuilder {
public:
using WXQSketch = common::WXQuantileSketch<bst_float, bst_float>;
using WQSketch = common::WQuantileSketch<bst_float, bst_float>;

protected:
HistogramCuts* p_cuts_;
Expand All @@ -195,21 +198,12 @@ class CutsBuilder {
return group_ind;
}

void AddCutPoint(WXQSketch::SummaryContainer const& summary) {
if (summary.size > 1 && summary.size <= 16) {
/* specialized code categorial / ordinal data -- use midpoints */
for (size_t i = 1; i < summary.size; ++i) {
bst_float cpt = (summary.data[i].value + summary.data[i - 1].value) / 2.0f;
if (i == 1 || cpt > p_cuts_->cut_values_.back()) {
p_cuts_->cut_values_.push_back(cpt);
}
}
} else {
for (size_t i = 2; i < summary.size; ++i) {
bst_float cpt = summary.data[i - 1].value;
if (i == 2 || cpt > p_cuts_->cut_values_.back()) {
p_cuts_->cut_values_.push_back(cpt);
}
void AddCutPoint(WQSketch::SummaryContainer const& summary, int max_bin) {
int required_cuts = std::min(static_cast<int>(summary.size), max_bin);
for (size_t i = 1; i < required_cuts; ++i) {
bst_float cpt = summary.data[i].value;
if (i == 1 || cpt > p_cuts_->cut_values_.back()) {
p_cuts_->cut_values_.push_back(cpt);
}
}
}
Expand Down Expand Up @@ -250,7 +244,7 @@ class DenseCuts : public CutsBuilder {
CutsBuilder(container) {
monitor_.Init(__FUNCTION__);
}
void Init(std::vector<WXQSketch>* sketchs, uint32_t max_num_bins);
void Init(std::vector<WQSketch>* sketchs, uint32_t max_num_bins);
void Build(DMatrix* p_fmat, uint32_t max_num_bins) override;
};

Expand Down
120 changes: 112 additions & 8 deletions tests/cpp/common/test_hist_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

#include "../../../src/common/hist_util.h"
#include "../helpers.h"
#include "test_hist_util.h"

namespace xgboost {
namespace common {
Expand Down Expand Up @@ -152,14 +153,6 @@ TEST(CutsBuilder, SearchGroupInd) {
delete pp_dmat;
}

namespace {
class SparseCutsWrapper : public SparseCuts {
public:
std::vector<uint32_t> const& ColPtrs() const { return p_cuts_->Ptrs(); }
std::vector<float> const& ColValues() const { return p_cuts_->Values(); }
};
} // anonymous namespace

TEST(SparseCuts, SingleThreadedBuild) {
size_t constexpr kRows = 267;
size_t constexpr kCols = 31;
Expand Down Expand Up @@ -235,5 +228,116 @@ TEST(SparseCuts, MultiThreadedBuild) {
omp_set_num_threads(ori_nthreads);
}

TEST(hist_util, DenseCutsCategorical) {
int categorical_sizes[] = {2, 6, 8, 12};
int num_bins = 256;
int sizes[] = {25, 100, 1000};
for (auto n : sizes) {
for (auto num_categories : categorical_sizes) {
auto x = GenerateRandomCategoricalSingleColumn(n, num_categories);
std::vector<float> x_sorted(x);
std::sort(x_sorted.begin(), x_sorted.end());
auto dmat = GetDMatrixFromData(x, n, 1);
HistogramCuts cuts;
DenseCuts dense(&cuts);
dense.Build(&dmat, num_bins);
auto cuts_from_sketch = cuts.Values();
EXPECT_LT(cuts.MinValues()[0], x_sorted.front());
EXPECT_GT(cuts_from_sketch.front(), x_sorted.front());
EXPECT_GE(cuts_from_sketch.back(), x_sorted.back());
EXPECT_EQ(cuts_from_sketch.size(), num_categories);
}
}
}

TEST(hist_util, DenseCutsAccuracyTest) {
int bin_sizes[] = {2, 16, 256, 512};
int sizes[] = {100, 1000, 1500};
int num_columns = 5;
for (auto num_rows : sizes) {
auto x = GenerateRandom(num_rows, num_columns);
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
for (auto num_bins : bin_sizes) {
HistogramCuts cuts;
DenseCuts dense(&cuts);
dense.Build(&dmat, num_bins);
ValidateCuts(cuts, x, num_rows, num_columns, num_bins);
}
}
}

TEST(hist_util, DenseCutsExternalMemory) {
int bin_sizes[] = {2, 16, 256, 512};
int sizes[] = {100, 1000, 1500};
int num_columns = 5;
for (auto num_rows : sizes) {
auto x = GenerateRandom(num_rows, num_columns);
dmlc::TemporaryDirectory tmpdir;
auto dmat =
GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, 50, tmpdir);
for (auto num_bins : bin_sizes) {
HistogramCuts cuts;
DenseCuts dense(&cuts);
dense.Build(dmat.get(), num_bins);
ValidateCuts(cuts, x, num_rows, num_columns, num_bins);
}
}
}

TEST(hist_util, SparseCutsAccuracyTest) {
int bin_sizes[] = {2, 16, 256, 512};
int sizes[] = {100, 1000, 1500};
int num_columns = 5;
for (auto num_rows : sizes) {
auto x = GenerateRandom(num_rows, num_columns);
auto dmat = GetDMatrixFromData(x, num_rows, num_columns);
for (auto num_bins : bin_sizes) {
HistogramCuts cuts;
SparseCuts sparse(&cuts);
sparse.Build(&dmat, num_bins);
ValidateCuts(cuts, x, num_rows, num_columns, num_bins);
}
}
}

TEST(hist_util, SparseCutsCategorical) {
int categorical_sizes[] = {2, 6, 8, 12};
int num_bins = 256;
int sizes[] = {25, 100, 1000};
for (auto n : sizes) {
for (auto num_categories : categorical_sizes) {
auto x = GenerateRandomCategoricalSingleColumn(n, num_categories);
std::vector<float> x_sorted(x);
std::sort(x_sorted.begin(), x_sorted.end());
auto dmat = GetDMatrixFromData(x, n, 1);
HistogramCuts cuts;
SparseCuts sparse(&cuts);
sparse.Build(&dmat, num_bins);
auto cuts_from_sketch = cuts.Values();
EXPECT_LT(cuts.MinValues()[0], x_sorted.front());
EXPECT_GT(cuts_from_sketch.front(), x_sorted.front());
EXPECT_GE(cuts_from_sketch.back(), x_sorted.back());
EXPECT_EQ(cuts_from_sketch.size(), num_categories);
}
}
}

TEST(hist_util, SparseCutsExternalMemory) {
int bin_sizes[] = {2, 16, 256, 512};
int sizes[] = {100, 1000, 1500};
int num_columns = 5;
for (auto num_rows : sizes) {
auto x = GenerateRandom(num_rows, num_columns);
dmlc::TemporaryDirectory tmpdir;
auto dmat =
GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, 50, tmpdir);
for (auto num_bins : bin_sizes) {
HistogramCuts cuts;
SparseCuts dense(&cuts);
dense.Build(dmat.get(), num_bins);
ValidateCuts(cuts, x, num_rows, num_columns, num_bins);
}
}
}
} // namespace common
} // namespace xgboost
Loading