Skip to content

Commit

Permalink
Replaced std::vector with HostDeviceVector in MetaInfo and SparsePage. (
Browse files Browse the repository at this point in the history
#3446)

* Replaced std::vector with HostDeviceVector in MetaInfo and SparsePage.

- added distributions to HostDeviceVector
- using HostDeviceVector for labels, weights and base margings in MetaInfo
- using HostDeviceVector for offset and data in SparsePage
- other necessary refactoring

* Added const version of HostDeviceVector API calls.

- const versions added to calls that can trigger data transfers, e.g. DevicePointer()
- updated the code that uses HostDeviceVector
- objective functions now accept const HostDeviceVector<bst_float>& for predictions

* Updated src/linear/updater_gpu_coordinate.cu.

* Added read-only state for HostDeviceVector sync.

- this means no copies are performed if both host and devices access
  the HostDeviceVector read-only

* Fixed linter and test errors.

- updated the lz4 plugin
- added ConstDeviceSpan to HostDeviceVector
- using device % dh::NVisibleDevices() for the physical device number,
  e.g. in calls to cudaSetDevice()

* Fixed explicit template instantiation errors for HostDeviceVector.

- replaced HostDeviceVector<unsigned int> with HostDeviceVector<int>

* Fixed HostDeviceVector tests that require multiple GPUs.

- added a mock set device handler; when set, it is called instead of cudaSetDevice()
  • Loading branch information
canonizer authored and RAMitchell committed Aug 30, 2018
1 parent 58d783d commit 72cd151
Show file tree
Hide file tree
Showing 45 changed files with 1,141 additions and 560 deletions.
84 changes: 48 additions & 36 deletions include/xgboost/data.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#include "./base.h"
#include "../../src/common/span.h"

#include "../../src/common/host_device_vector.h"

namespace xgboost {
// forward declare learner.
class LearnerImpl;
Expand All @@ -41,7 +43,7 @@ class MetaInfo {
/*! \brief number of nonzero entries in the data */
uint64_t num_nonzero_{0};
/*! \brief label of each instance */
std::vector<bst_float> labels_;
HostDeviceVector<bst_float> labels_;
/*!
* \brief specified root index of each instance,
* can be used for multi task setting
Expand All @@ -53,15 +55,15 @@ class MetaInfo {
*/
std::vector<bst_uint> group_ptr_;
/*! \brief weights of each instance, optional */
std::vector<bst_float> weights_;
HostDeviceVector<bst_float> weights_;
/*! \brief session-id of each instance, optional */
std::vector<uint64_t> qids_;
/*!
* \brief initialized margins,
* if specified, xgboost will start from this init margin
* can be used to specify initial prediction to boost from.
*/
std::vector<bst_float> base_margin_;
HostDeviceVector<bst_float> base_margin_;
/*! \brief version flag, used to check version of this info */
static const int kVersion = 2;
/*! \brief version that introduced qid field */
Expand All @@ -74,7 +76,7 @@ class MetaInfo {
* \return The weight.
*/
inline bst_float GetWeight(size_t i) const {
return weights_.size() != 0 ? weights_[i] : 1.0f;
return weights_.Size() != 0 ? weights_.HostVector()[i] : 1.0f;
}
/*!
* \brief Get the root index of i-th instance.
Expand All @@ -86,12 +88,12 @@ class MetaInfo {
}
/*! \brief get sorted indexes (argsort) of labels by absolute value (used by cox loss) */
inline const std::vector<size_t>& LabelAbsSort() const {
if (label_order_cache_.size() == labels_.size()) {
if (label_order_cache_.size() == labels_.Size()) {
return label_order_cache_;
}
label_order_cache_.resize(labels_.size());
label_order_cache_.resize(labels_.Size());
std::iota(label_order_cache_.begin(), label_order_cache_.end(), 0);
const auto l = labels_;
const auto& l = labels_.HostVector();
XGBOOST_PARALLEL_SORT(label_order_cache_.begin(), label_order_cache_.end(),
[&l](size_t i1, size_t i2) {return std::abs(l[i1]) < std::abs(l[i2]);});

Expand Down Expand Up @@ -151,9 +153,9 @@ struct Entry {
*/
class SparsePage {
public:
std::vector<size_t> offset;
HostDeviceVector<size_t> offset;
/*! \brief the data of the segments */
std::vector<Entry> data;
HostDeviceVector<Entry> data;

size_t base_rowid;

Expand All @@ -162,8 +164,10 @@ class SparsePage {

/*! \brief get i-th row from the batch */
inline Inst operator[](size_t i) const {
return {data.data() + offset[i],
static_cast<Inst::index_type>(offset[i + 1] - offset[i])};
const auto& data_vec = data.HostVector();
const auto& offset_vec = offset.HostVector();
return {data_vec.data() + offset_vec[i],
static_cast<Inst::index_type>(offset_vec[i + 1] - offset_vec[i])};
}

/*! \brief constructor */
Expand All @@ -172,73 +176,81 @@ class SparsePage {
}
/*! \return number of instance in the page */
inline size_t Size() const {
return offset.size() - 1;
return offset.Size() - 1;
}
/*! \return estimation of memory cost of this page */
inline size_t MemCostBytes() const {
return offset.size() * sizeof(size_t) + data.size() * sizeof(Entry);
return offset.Size() * sizeof(size_t) + data.Size() * sizeof(Entry);
}
/*! \brief clear the page */
inline void Clear() {
base_rowid = 0;
offset.clear();
offset.push_back(0);
data.clear();
auto& offset_vec = offset.HostVector();
offset_vec.clear();
offset_vec.push_back(0);
data.HostVector().clear();
}

/*!
* \brief Push row block into the page.
* \param batch the row batch.
*/
inline void Push(const dmlc::RowBlock<uint32_t>& batch) {
data.reserve(data.size() + batch.offset[batch.size] - batch.offset[0]);
offset.reserve(offset.size() + batch.size);
auto& data_vec = data.HostVector();
auto& offset_vec = offset.HostVector();
data_vec.reserve(data.Size() + batch.offset[batch.size] - batch.offset[0]);
offset_vec.reserve(offset.Size() + batch.size);
CHECK(batch.index != nullptr);
for (size_t i = 0; i < batch.size; ++i) {
offset.push_back(offset.back() + batch.offset[i + 1] - batch.offset[i]);
offset_vec.push_back(offset_vec.back() + batch.offset[i + 1] - batch.offset[i]);
}
for (size_t i = batch.offset[0]; i < batch.offset[batch.size]; ++i) {
uint32_t index = batch.index[i];
bst_float fvalue = batch.value == nullptr ? 1.0f : batch.value[i];
data.emplace_back(index, fvalue);
data_vec.emplace_back(index, fvalue);
}
CHECK_EQ(offset.back(), data.size());
CHECK_EQ(offset_vec.back(), data.Size());
}
/*!
* \brief Push a sparse page
* \param batch the row page
*/
inline void Push(const SparsePage &batch) {
size_t top = offset.back();
data.resize(top + batch.data.size());
std::memcpy(dmlc::BeginPtr(data) + top,
dmlc::BeginPtr(batch.data),
sizeof(Entry) * batch.data.size());
size_t begin = offset.size();
offset.resize(begin + batch.Size());
auto& data_vec = data.HostVector();
auto& offset_vec = offset.HostVector();
const auto& batch_offset_vec = batch.offset.HostVector();
const auto& batch_data_vec = batch.data.HostVector();
size_t top = offset_vec.back();
data_vec.resize(top + batch.data.Size());
std::memcpy(dmlc::BeginPtr(data_vec) + top,
dmlc::BeginPtr(batch_data_vec),
sizeof(Entry) * batch.data.Size());
size_t begin = offset.Size();
offset_vec.resize(begin + batch.Size());
for (size_t i = 0; i < batch.Size(); ++i) {
offset[i + begin] = top + batch.offset[i + 1];
offset_vec[i + begin] = top + batch_offset_vec[i + 1];
}
}
/*!
* \brief Push one instance into page
* \param inst an instance row
*/
inline void Push(const Inst &inst) {
offset.push_back(offset.back() + inst.size());
size_t begin = data.size();
data.resize(begin + inst.size());
auto& data_vec = data.HostVector();
auto& offset_vec = offset.HostVector();
offset_vec.push_back(offset_vec.back() + inst.size());

size_t begin = data_vec.size();
data_vec.resize(begin + inst.size());
if (inst.size() != 0) {
std::memcpy(dmlc::BeginPtr(data) + begin, inst.data(),
std::memcpy(dmlc::BeginPtr(data_vec) + begin, inst.data(),
sizeof(Entry) * inst.size());
}
}

size_t Size() { return offset.size() - 1; }
size_t Size() { return offset.Size() - 1; }
};



/*!
* \brief This is data structure that user can pass to DMatrix::Create
* to create a DMatrix for training, user can create this data structure
Expand Down
2 changes: 1 addition & 1 deletion include/xgboost/objective.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class ObjFunction {
* \param iteration current iteration number.
* \param out_gpair output of get gradient, saves gradient and second order gradient in
*/
virtual void GetGradient(HostDeviceVector<bst_float>* preds,
virtual void GetGradient(const HostDeviceVector<bst_float>& preds,
const MetaInfo& info,
int iteration,
HostDeviceVector<GradientPair>* out_gpair) = 0;
Expand Down
11 changes: 6 additions & 5 deletions plugin/example/custom_obj.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,21 +33,22 @@ class MyLogistic : public ObjFunction {
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
param_.InitAllowUnknown(args);
}
void GetGradient(HostDeviceVector<bst_float> *preds,
void GetGradient(const HostDeviceVector<bst_float> &preds,
const MetaInfo &info,
int iter,
HostDeviceVector<GradientPair> *out_gpair) override {
out_gpair->Resize(preds->Size());
std::vector<bst_float>& preds_h = preds->HostVector();
out_gpair->Resize(preds.Size());
const std::vector<bst_float>& preds_h = preds.HostVector();
std::vector<GradientPair>& out_gpair_h = out_gpair->HostVector();
const std::vector<bst_float>& labels_h = info.labels_.HostVector();
for (size_t i = 0; i < preds_h.size(); ++i) {
bst_float w = info.GetWeight(i);
// scale the negative examples!
if (info.labels_[i] == 0.0f) w *= param_.scale_neg_weight;
if (labels_h[i] == 0.0f) w *= param_.scale_neg_weight;
// logistic transformation
bst_float p = 1.0f / (1.0f + std::exp(-preds_h[i]));
// this is the gradient
bst_float grad = (p - info.labels_[i]) * w;
bst_float grad = (p - labels_h[i]) * w;
// this is the second order gradient
bst_float hess = p * (1.0f - p) * w;
out_gpair_h.at(i) = GradientPair(grad, hess);
Expand Down
51 changes: 28 additions & 23 deletions plugin/lz4/sparse_page_lz4_format.cc
Original file line number Diff line number Diff line change
Expand Up @@ -177,15 +177,17 @@ class SparsePageLZ4Format : public SparsePageFormat {
}

bool Read(SparsePage* page, dmlc::SeekStream* fi) override {
if (!fi->Read(&(page->offset))) return false;
CHECK_NE(page->offset.size(), 0) << "Invalid SparsePage file";
auto& offset_vec = page->offset.HostVector();
auto& data_vec = page->data.HostVector();
if (!fi->Read(&(offset_vec))) return false;
CHECK_NE(offset_vec.size(), 0) << "Invalid SparsePage file";
this->LoadIndexValue(fi);

page->data.resize(page->offset.back());
data_vec.resize(offset_vec.back());
CHECK_EQ(index_.data.size(), value_.data.size());
CHECK_EQ(index_.data.size(), page->data.size());
for (size_t i = 0; i < page->data.size(); ++i) {
page->data[i] = Entry(index_.data[i] + min_index_, value_.data[i]);
CHECK_EQ(index_.data.size(), data_vec.size());
for (size_t i = 0; i < data_vec.size(); ++i) {
data_vec[i] = Entry(index_.data[i] + min_index_, value_.data[i]);
}
return true;
}
Expand All @@ -195,47 +197,50 @@ class SparsePageLZ4Format : public SparsePageFormat {
const std::vector<bst_uint>& sorted_index_set) override {
if (!fi->Read(&disk_offset_)) return false;
this->LoadIndexValue(fi);

page->offset.clear();
page->offset.push_back(0);
auto& offset_vec = page->offset.HostVector();
auto& data_vec = page->data.HostVector();
offset_vec.clear();
offset_vec.push_back(0);
for (bst_uint cid : sorted_index_set) {
page->offset.push_back(
page->offset.back() + disk_offset_[cid + 1] - disk_offset_[cid]);
offset_vec.push_back(
offset_vec.back() + disk_offset_[cid + 1] - disk_offset_[cid]);
}
page->data.resize(page->offset.back());
data_vec.resize(offset_vec.back());
CHECK_EQ(index_.data.size(), value_.data.size());
CHECK_EQ(index_.data.size(), disk_offset_.back());

for (size_t i = 0; i < sorted_index_set.size(); ++i) {
bst_uint cid = sorted_index_set[i];
size_t dst_begin = page->offset[i];
size_t dst_begin = offset_vec[i];
size_t src_begin = disk_offset_[cid];
size_t num = disk_offset_[cid + 1] - disk_offset_[cid];
for (size_t j = 0; j < num; ++j) {
page->data[dst_begin + j] = Entry(
data_vec[dst_begin + j] = Entry(
index_.data[src_begin + j] + min_index_, value_.data[src_begin + j]);
}
}
return true;
}

void Write(const SparsePage& page, dmlc::Stream* fo) override {
CHECK(page.offset.size() != 0 && page.offset[0] == 0);
CHECK_EQ(page.offset.back(), page.data.size());
fo->Write(page.offset);
const auto& offset_vec = page.offset.HostVector();
const auto& data_vec = page.data.HostVector();
CHECK(offset_vec.size() != 0 && offset_vec[0] == 0);
CHECK_EQ(offset_vec.back(), data_vec.size());
fo->Write(offset_vec);
min_index_ = page.base_rowid;
fo->Write(&min_index_, sizeof(min_index_));
index_.data.resize(page.data.size());
value_.data.resize(page.data.size());
index_.data.resize(data_vec.size());
value_.data.resize(data_vec.size());

for (size_t i = 0; i < page.data.size(); ++i) {
bst_uint idx = page.data[i].index - min_index_;
for (size_t i = 0; i < data_vec.size(); ++i) {
bst_uint idx = data_vec[i].index - min_index_;
CHECK_LE(idx, static_cast<bst_uint>(std::numeric_limits<StorageIndex>::max()))
<< "The storage index is chosen to limited to smaller equal than "
<< std::numeric_limits<StorageIndex>::max()
<< "min_index=" << min_index_;
index_.data[i] = static_cast<StorageIndex>(idx);
value_.data[i] = page.data[i].fvalue;
value_.data[i] = data_vec[i].fvalue;
}

index_.InitCompressChunks(kChunkSize, kMaxChunk);
Expand All @@ -259,7 +264,7 @@ class SparsePageLZ4Format : public SparsePageFormat {
raw_bytes_value_ += value_.RawBytes();
encoded_bytes_index_ += index_.EncodedBytes();
encoded_bytes_value_ += value_.EncodedBytes();
raw_bytes_ += page.offset.size() * sizeof(size_t);
raw_bytes_ += offset_vec.size() * sizeof(size_t);
}

inline void LoadIndexValue(dmlc::SeekStream* fi) {
Expand Down
Loading

0 comments on commit 72cd151

Please sign in to comment.