Skip to content

Commit

Permalink
Fix Histogram allocation. (#4347)
Browse files Browse the repository at this point in the history
* Fix Histogram allocation.

nidx_map is cleared after `Reset`, but histogram data size isn't changed hence
histogram recycling is used in later iterations.  After a reset(building new
tree), newly allocated node will start from 0, while recycling always choose
the node with smallest index, which happens to be our newly allocated node 0.
  • Loading branch information
trivialfis authored Apr 10, 2019
1 parent 81c1cd4 commit 5c25755
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 18 deletions.
52 changes: 34 additions & 18 deletions src/tree/updater_gpu_hist.cu
Original file line number Diff line number Diff line change
Expand Up @@ -365,18 +365,24 @@ __global__ void EvaluateSplitKernel(
*
* \summary Data storage for node histograms on device. Automatically expands.
*
* \tparam GradientSumT histogram entry type.
* \tparam kStopGrowingSize Do not grow beyond this size
*
* \author Rory
* \date 28/07/2018
*/
template <typename GradientSumT>
template <typename GradientSumT, size_t kStopGrowingSize = 1 << 26>
class DeviceHistogram {
private:
/*! \brief Map nidx to starting index of its histogram. */
std::map<int, size_t> nidx_map_;
thrust::device_vector<typename GradientSumT::ValueT> data_;
static constexpr size_t kStopGrowingSize = 1 << 26; // Do not grow beyond this size
int n_bins_;
int device_id_;
static constexpr size_t kNumItemsInGradientSum =
sizeof(GradientSumT) / sizeof(typename GradientSumT::ValueT);
static_assert(kNumItemsInGradientSum == 2,
"Number of items in gradient type should be 2.");

public:
void Init(int device_id, int n_bins) {
Expand All @@ -390,34 +396,44 @@ class DeviceHistogram {
data_.size() * sizeof(typename decltype(data_)::value_type)));
nidx_map_.clear();
}
bool HistogramExists(int nidx) {
return nidx_map_.find(nidx) != nidx_map_.end();
bool HistogramExists(int nidx) const {
return nidx_map_.find(nidx) != nidx_map_.cend();
}
size_t HistogramSize() const {
return n_bins_ * kNumItemsInGradientSum;
}

thrust::device_vector<typename GradientSumT::ValueT> &Data() {
thrust::device_vector<typename GradientSumT::ValueT>& Data() {
return data_;
}

void AllocateHistogram(int nidx) {
if (HistogramExists(nidx)) return;
size_t current_size = nidx_map_.size() * n_bins_ *
2; // Number of items currently used in data
// Number of items currently used in data
const size_t used_size = nidx_map_.size() * HistogramSize();
const size_t new_used_size = used_size + HistogramSize();
dh::safe_cuda(cudaSetDevice(device_id_));
if (data_.size() >= kStopGrowingSize) {
// Recycle histogram memory
std::pair<int, size_t> old_entry = *nidx_map_.begin();
nidx_map_.erase(old_entry.first);
dh::safe_cuda(cudaMemsetAsync(data_.data().get() + old_entry.second, 0,
n_bins_ * sizeof(GradientSumT)));
nidx_map_[nidx] = old_entry.second;
if (new_used_size <= data_.size()) {
// no need to remove old node, just insert the new one.
nidx_map_[nidx] = used_size;
// memset histogram size in bytes
dh::safe_cuda(cudaMemsetAsync(data_.data().get() + used_size, 0,
n_bins_ * sizeof(GradientSumT)));
} else {
std::pair<int, size_t> old_entry = *nidx_map_.begin();
nidx_map_.erase(old_entry.first);
dh::safe_cuda(cudaMemsetAsync(data_.data().get() + old_entry.second, 0,
n_bins_ * sizeof(GradientSumT)));
nidx_map_[nidx] = old_entry.second;
}
} else {
// Append new node histogram
nidx_map_[nidx] = current_size;
if (data_.size() < current_size + n_bins_ * 2) {
size_t new_size = current_size * 2; // Double in size
new_size = std::max(static_cast<size_t>(n_bins_ * 2),
new_size); // Have at least one histogram
data_.resize(new_size);
nidx_map_[nidx] = used_size;
size_t new_required_memory = std::max(data_.size() * 2, HistogramSize());
if (data_.size() < new_required_memory) {
data_.resize(new_required_memory);
}
}
}
Expand Down
36 changes: 36 additions & 0 deletions tests/cpp/tree/test_gpu_hist.cu
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,42 @@
namespace xgboost {
namespace tree {

TEST(GpuHist, DeviceHistogram) {
// Ensures that node allocates correctly after reaching `kStopGrowingSize`.
dh::SaveCudaContext{
[&]() {
dh::safe_cuda(cudaSetDevice(0));
constexpr size_t kNBins = 128;
constexpr size_t kNNodes = 4;
constexpr size_t kStopGrowing = kNNodes * kNBins * 2u;
DeviceHistogram<GradientPairPrecise, kStopGrowing> histogram;
histogram.Init(0, kNBins);
for (size_t i = 0; i < kNNodes; ++i) {
histogram.AllocateHistogram(i);
}
histogram.Reset();
ASSERT_EQ(histogram.Data().size(), kStopGrowing);

// Use allocated memory but do not erase nidx_map.
for (size_t i = 0; i < kNNodes; ++i) {
histogram.AllocateHistogram(i);
}
for (size_t i = 0; i < kNNodes; ++i) {
ASSERT_TRUE(histogram.HistogramExists(i));
}

// Erase existing nidx_map.
for (size_t i = kNNodes; i < kNNodes * 2; ++i) {
histogram.AllocateHistogram(i);
}
for (size_t i = 0; i < kNNodes; ++i) {
ASSERT_FALSE(histogram.HistogramExists(i));
}
}
};

}

template <typename GradientSumT>
void BuildGidx(DeviceShard<GradientSumT>* shard, int n_rows, int n_cols,
bst_float sparsity=0) {
Expand Down

0 comments on commit 5c25755

Please sign in to comment.