Replaced std::vector with HostDeviceVector in MetaInfo and SparsePage. (

#3446) * Replaced std::vector with HostDeviceVector in MetaInfo and SparsePage. - added distributions to HostDeviceVector - using HostDeviceVector for labels, weights and base margings in MetaInfo - using HostDeviceVector for offset and data in SparsePage - other necessary refactoring * Added const version of HostDeviceVector API calls. - const versions added to calls that can trigger data transfers, e.g. DevicePointer() - updated the code that uses HostDeviceVector - objective functions now accept const HostDeviceVector<bst_float>& for predictions * Updated src/linear/updater_gpu_coordinate.cu. * Added read-only state for HostDeviceVector sync. - this means no copies are performed if both host and devices access the HostDeviceVector read-only * Fixed linter and test errors. - updated the lz4 plugin - added ConstDeviceSpan to HostDeviceVector - using device % dh::NVisibleDevices() for the physical device number, e.g. in calls to cudaSetDevice() * Fixed explicit template instantiation errors for HostDeviceVector. - replaced HostDeviceVector<unsigned int> with HostDeviceVector<int> * Fixed HostDeviceVector tests that require multiple GPUs. - added a mock set device handler; when set, it is called instead of cudaSetDevice()
dmlc · Aug 30, 2018 · 72cd151 · 72cd151
1 parent 58d783d
commit 72cd151
Show file tree

Hide file tree

Showing 45 changed files with 1,141 additions and 560 deletions.
diff --git a/include/xgboost/data.h b/include/xgboost/data.h
@@ -17,6 +17,8 @@
 #include "./base.h"
 #include "../../src/common/span.h"
 
+#include "../../src/common/host_device_vector.h"
+
 namespace xgboost {
 // forward declare learner.
 class LearnerImpl;
@@ -41,7 +43,7 @@ class MetaInfo {
   /*! \brief number of nonzero entries in the data */
   uint64_t num_nonzero_{0};
   /*! \brief label of each instance */
-  std::vector<bst_float> labels_;
+  HostDeviceVector<bst_float> labels_;
   /*!
    * \brief specified root index of each instance,
    *  can be used for multi task setting
@@ -53,15 +55,15 @@ class MetaInfo {
    */
   std::vector<bst_uint> group_ptr_;
   /*! \brief weights of each instance, optional */
-  std::vector<bst_float> weights_;
+  HostDeviceVector<bst_float> weights_;
   /*! \brief session-id of each instance, optional */
   std::vector<uint64_t> qids_;
   /*!
    * \brief initialized margins,
    * if specified, xgboost will start from this init margin
    * can be used to specify initial prediction to boost from.
    */
-  std::vector<bst_float> base_margin_;
+  HostDeviceVector<bst_float> base_margin_;
   /*! \brief version flag, used to check version of this info */
   static const int kVersion = 2;
   /*! \brief version that introduced qid field */
@@ -74,7 +76,7 @@ class MetaInfo {
    * \return The weight.
    */
   inline bst_float GetWeight(size_t i) const {
-    return weights_.size() != 0 ?  weights_[i] : 1.0f;
+    return weights_.Size() != 0 ?  weights_.HostVector()[i] : 1.0f;
   }
   /*!
    * \brief Get the root index of i-th instance.
@@ -86,12 +88,12 @@ class MetaInfo {
   }
   /*! \brief get sorted indexes (argsort) of labels by absolute value (used by cox loss) */
   inline const std::vector<size_t>& LabelAbsSort() const {
-    if (label_order_cache_.size() == labels_.size()) {
+    if (label_order_cache_.size() == labels_.Size()) {
       return label_order_cache_;
     }
-    label_order_cache_.resize(labels_.size());
+    label_order_cache_.resize(labels_.Size());
     std::iota(label_order_cache_.begin(), label_order_cache_.end(), 0);
-    const auto l = labels_;
+    const auto& l = labels_.HostVector();
     XGBOOST_PARALLEL_SORT(label_order_cache_.begin(), label_order_cache_.end(),
               [&l](size_t i1, size_t i2) {return std::abs(l[i1]) < std::abs(l[i2]);});
 
@@ -151,9 +153,9 @@ struct Entry {
  */
 class SparsePage {
  public:
-  std::vector<size_t> offset;
+  HostDeviceVector<size_t> offset;
   /*! \brief the data of the segments */
-  std::vector<Entry> data;
+  HostDeviceVector<Entry> data;
 
   size_t base_rowid;
 
@@ -162,8 +164,10 @@ class SparsePage {
 
   /*! \brief get i-th row from the batch */
   inline Inst operator[](size_t i) const {
-    return {data.data() + offset[i],
-            static_cast<Inst::index_type>(offset[i + 1] - offset[i])};
+    const auto& data_vec = data.HostVector();
+    const auto& offset_vec = offset.HostVector();
+    return {data_vec.data() + offset_vec[i],
+            static_cast<Inst::index_type>(offset_vec[i + 1] - offset_vec[i])};
   }
 
   /*! \brief constructor */
@@ -172,73 +176,81 @@ class SparsePage {
   }
   /*! \return number of instance in the page */
   inline size_t Size() const {
-    return offset.size() - 1;
+    return offset.Size() - 1;
   }
   /*! \return estimation of memory cost of this page */
   inline size_t MemCostBytes() const {
-    return offset.size() * sizeof(size_t) + data.size() * sizeof(Entry);
+    return offset.Size() * sizeof(size_t) + data.Size() * sizeof(Entry);
   }
   /*! \brief clear the page */
   inline void Clear() {
     base_rowid = 0;
-    offset.clear();
-    offset.push_back(0);
-    data.clear();
+    auto& offset_vec = offset.HostVector();
+    offset_vec.clear();
+    offset_vec.push_back(0);
+    data.HostVector().clear();
   }
 
   /*!
    * \brief Push row block into the page.
    * \param batch the row batch.
    */
   inline void Push(const dmlc::RowBlock<uint32_t>& batch) {
-    data.reserve(data.size() + batch.offset[batch.size] - batch.offset[0]);
-    offset.reserve(offset.size() + batch.size);
+    auto& data_vec = data.HostVector();
+    auto& offset_vec = offset.HostVector();
+    data_vec.reserve(data.Size() + batch.offset[batch.size] - batch.offset[0]);
+    offset_vec.reserve(offset.Size() + batch.size);
     CHECK(batch.index != nullptr);
     for (size_t i = 0; i < batch.size; ++i) {
-      offset.push_back(offset.back() + batch.offset[i + 1] - batch.offset[i]);
+      offset_vec.push_back(offset_vec.back() + batch.offset[i + 1] - batch.offset[i]);
     }
     for (size_t i = batch.offset[0]; i < batch.offset[batch.size]; ++i) {
       uint32_t index = batch.index[i];
       bst_float fvalue = batch.value == nullptr ? 1.0f : batch.value[i];
-      data.emplace_back(index, fvalue);
+      data_vec.emplace_back(index, fvalue);
     }
-    CHECK_EQ(offset.back(), data.size());
+    CHECK_EQ(offset_vec.back(), data.Size());
   }
   /*!
    * \brief Push a sparse page
    * \param batch the row page
    */
   inline void Push(const SparsePage &batch) {
-    size_t top = offset.back();
-    data.resize(top + batch.data.size());
-    std::memcpy(dmlc::BeginPtr(data) + top,
-                dmlc::BeginPtr(batch.data),
-                sizeof(Entry) * batch.data.size());
-    size_t begin = offset.size();
-    offset.resize(begin + batch.Size());
+    auto& data_vec = data.HostVector();
+    auto& offset_vec = offset.HostVector();
+    const auto& batch_offset_vec = batch.offset.HostVector();
+    const auto& batch_data_vec = batch.data.HostVector();
+    size_t top = offset_vec.back();
+    data_vec.resize(top + batch.data.Size());
+    std::memcpy(dmlc::BeginPtr(data_vec) + top,
+                dmlc::BeginPtr(batch_data_vec),
+                sizeof(Entry) * batch.data.Size());
+    size_t begin = offset.Size();
+    offset_vec.resize(begin + batch.Size());
     for (size_t i = 0; i < batch.Size(); ++i) {
-      offset[i + begin] = top + batch.offset[i + 1];
+      offset_vec[i + begin] = top + batch_offset_vec[i + 1];
     }
   }
   /*!
    * \brief Push one instance into page
    *  \param inst an instance row
    */
   inline void Push(const Inst &inst) {
-    offset.push_back(offset.back() + inst.size());
-    size_t begin = data.size();
-    data.resize(begin + inst.size());
+    auto& data_vec = data.HostVector();
+    auto& offset_vec = offset.HostVector();
+    offset_vec.push_back(offset_vec.back() + inst.size());
+
+    size_t begin = data_vec.size();
+    data_vec.resize(begin + inst.size());
     if (inst.size() != 0) {
-      std::memcpy(dmlc::BeginPtr(data) + begin, inst.data(),
+      std::memcpy(dmlc::BeginPtr(data_vec) + begin, inst.data(),
                   sizeof(Entry) * inst.size());
     }
   }
 
-  size_t Size() { return offset.size() - 1; }
+  size_t Size() { return offset.Size() - 1; }
 };
 
-
-
 /*!
  * \brief This is data structure that user can pass to DMatrix::Create
  *  to create a DMatrix for training, user can create this data structure

diff --git a/include/xgboost/objective.h b/include/xgboost/objective.h
@@ -44,7 +44,7 @@ class ObjFunction {
    * \param iteration current iteration number.
    * \param out_gpair output of get gradient, saves gradient and second order gradient in
    */
-  virtual void GetGradient(HostDeviceVector<bst_float>* preds,
+  virtual void GetGradient(const HostDeviceVector<bst_float>& preds,
                            const MetaInfo& info,
                            int iteration,
                            HostDeviceVector<GradientPair>* out_gpair) = 0;

diff --git a/plugin/example/custom_obj.cc b/plugin/example/custom_obj.cc
@@ -33,21 +33,22 @@ class MyLogistic : public ObjFunction {
   void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
     param_.InitAllowUnknown(args);
   }
-  void GetGradient(HostDeviceVector<bst_float> *preds,
+  void GetGradient(const HostDeviceVector<bst_float> &preds,
                    const MetaInfo &info,
                    int iter,
                    HostDeviceVector<GradientPair> *out_gpair) override {
-    out_gpair->Resize(preds->Size());
-    std::vector<bst_float>& preds_h = preds->HostVector();
+    out_gpair->Resize(preds.Size());
+    const std::vector<bst_float>& preds_h = preds.HostVector();
     std::vector<GradientPair>& out_gpair_h = out_gpair->HostVector();
+    const std::vector<bst_float>& labels_h = info.labels_.HostVector();
     for (size_t i = 0; i < preds_h.size(); ++i) {
       bst_float w = info.GetWeight(i);
       // scale the negative examples!
-      if (info.labels_[i] == 0.0f) w *= param_.scale_neg_weight;
+      if (labels_h[i] == 0.0f) w *= param_.scale_neg_weight;
       // logistic transformation
       bst_float p = 1.0f / (1.0f + std::exp(-preds_h[i]));
       // this is the gradient
-      bst_float grad = (p - info.labels_[i]) * w;
+      bst_float grad = (p - labels_h[i]) * w;
       // this is the second order gradient
       bst_float hess = p * (1.0f - p) * w;
       out_gpair_h.at(i) = GradientPair(grad, hess);

diff --git a/plugin/lz4/sparse_page_lz4_format.cc b/plugin/lz4/sparse_page_lz4_format.cc
@@ -177,15 +177,17 @@ class SparsePageLZ4Format : public SparsePageFormat {
   }
 
   bool Read(SparsePage* page, dmlc::SeekStream* fi) override {
-    if (!fi->Read(&(page->offset))) return false;
-    CHECK_NE(page->offset.size(), 0) << "Invalid SparsePage file";
+    auto& offset_vec = page->offset.HostVector();
+    auto& data_vec = page->data.HostVector();
+    if (!fi->Read(&(offset_vec))) return false;
+    CHECK_NE(offset_vec.size(), 0) << "Invalid SparsePage file";
     this->LoadIndexValue(fi);
 
-    page->data.resize(page->offset.back());
+    data_vec.resize(offset_vec.back());
     CHECK_EQ(index_.data.size(), value_.data.size());
-    CHECK_EQ(index_.data.size(), page->data.size());
-    for (size_t i = 0; i < page->data.size(); ++i) {
-      page->data[i] = Entry(index_.data[i] + min_index_, value_.data[i]);
+    CHECK_EQ(index_.data.size(), data_vec.size());
+    for (size_t i = 0; i < data_vec.size(); ++i) {
+      data_vec[i] = Entry(index_.data[i] + min_index_, value_.data[i]);
     }
     return true;
   }
@@ -195,47 +197,50 @@ class SparsePageLZ4Format : public SparsePageFormat {
             const std::vector<bst_uint>& sorted_index_set) override {
     if (!fi->Read(&disk_offset_)) return false;
     this->LoadIndexValue(fi);
-
-    page->offset.clear();
-    page->offset.push_back(0);
+    auto& offset_vec = page->offset.HostVector();
+    auto& data_vec = page->data.HostVector();
+    offset_vec.clear();
+    offset_vec.push_back(0);
     for (bst_uint cid : sorted_index_set) {
-      page->offset.push_back(
-          page->offset.back() + disk_offset_[cid + 1] - disk_offset_[cid]);
+      offset_vec.push_back(
+          offset_vec.back() + disk_offset_[cid + 1] - disk_offset_[cid]);
     }
-    page->data.resize(page->offset.back());
+    data_vec.resize(offset_vec.back());
     CHECK_EQ(index_.data.size(), value_.data.size());
     CHECK_EQ(index_.data.size(), disk_offset_.back());
 
     for (size_t i = 0; i < sorted_index_set.size(); ++i) {
       bst_uint cid = sorted_index_set[i];
-      size_t dst_begin = page->offset[i];
+      size_t dst_begin = offset_vec[i];
       size_t src_begin = disk_offset_[cid];
       size_t num = disk_offset_[cid + 1] - disk_offset_[cid];
       for (size_t j = 0; j < num; ++j) {
-        page->data[dst_begin + j] = Entry(
+        data_vec[dst_begin + j] = Entry(
             index_.data[src_begin + j] + min_index_, value_.data[src_begin + j]);
       }
     }
     return true;
   }
 
   void Write(const SparsePage& page, dmlc::Stream* fo) override {
-    CHECK(page.offset.size() != 0 && page.offset[0] == 0);
-    CHECK_EQ(page.offset.back(), page.data.size());
-    fo->Write(page.offset);
+    const auto& offset_vec = page.offset.HostVector();
+    const auto& data_vec = page.data.HostVector();
+    CHECK(offset_vec.size() != 0 && offset_vec[0] == 0);
+    CHECK_EQ(offset_vec.back(), data_vec.size());
+    fo->Write(offset_vec);
     min_index_ = page.base_rowid;
     fo->Write(&min_index_, sizeof(min_index_));
-    index_.data.resize(page.data.size());
-    value_.data.resize(page.data.size());
+    index_.data.resize(data_vec.size());
+    value_.data.resize(data_vec.size());
 
-    for (size_t i = 0; i < page.data.size(); ++i) {
-      bst_uint idx = page.data[i].index - min_index_;
+    for (size_t i = 0; i < data_vec.size(); ++i) {
+      bst_uint idx = data_vec[i].index - min_index_;
       CHECK_LE(idx, static_cast<bst_uint>(std::numeric_limits<StorageIndex>::max()))
           << "The storage index is chosen to limited to smaller equal than "
           << std::numeric_limits<StorageIndex>::max()
           << "min_index=" << min_index_;
       index_.data[i] = static_cast<StorageIndex>(idx);
-      value_.data[i] = page.data[i].fvalue;
+      value_.data[i] = data_vec[i].fvalue;
     }
 
     index_.InitCompressChunks(kChunkSize, kMaxChunk);
@@ -259,7 +264,7 @@ class SparsePageLZ4Format : public SparsePageFormat {
     raw_bytes_value_ += value_.RawBytes();
     encoded_bytes_index_ += index_.EncodedBytes();
     encoded_bytes_value_ += value_.EncodedBytes();
-    raw_bytes_ += page.offset.size() * sizeof(size_t);
+    raw_bytes_ += offset_vec.size() * sizeof(size_t);
   }
 
   inline void LoadIndexValue(dmlc::SeekStream* fi) {