Add DeviceSpan in HostDeviceVector, use it in regression obj.

dmlc · Aug 13, 2018 · dbd61c2 · dbd61c2
1 parent c8ad315
commit dbd61c2
Show file tree

Hide file tree

Showing 5 changed files with 51 additions and 7 deletions.
diff --git a/src/common/host_device_vector.cc b/src/common/host_device_vector.cc
@@ -53,6 +53,11 @@ GPUSet HostDeviceVector<T>::Devices() const { return GPUSet::Empty(); }
 template <typename T>
 T* HostDeviceVector<T>::DevicePointer(int device) { return nullptr; }
 
+template <typename T>
+common::Span<T> HostDeviceVector<T>::DeviceSpan(int device) {
+  return common::Span<T>();
+}
+
 template <typename T>
 std::vector<T>& HostDeviceVector<T>::HostVector() { return impl_->data_h_; }
 

diff --git a/src/common/host_device_vector.cu b/src/common/host_device_vector.cu
@@ -156,6 +156,13 @@ struct HostDeviceVectorImpl {
     return shards_[devices_.Index(device)].data_.data().get();
   }
 
+  common::Span<T> DeviceSpan(int device) {
+    CHECK(devices_.Contains(device));
+    LazySyncDevice(device);
+    return { shards_[devices_.Index(device)].data_.data().get(),
+             static_cast<typename common::Span<T>::index_type>(Size()) };
+  }
+
   size_t DeviceSize(int device) {
     CHECK(devices_.Contains(device));
     LazySyncDevice(device);
@@ -323,6 +330,11 @@ GPUSet HostDeviceVector<T>::Devices() const { return impl_->Devices(); }
 template <typename T>
 T* HostDeviceVector<T>::DevicePointer(int device) { return impl_->DevicePointer(device); }
 
+template <typename T>
+common::Span<T> HostDeviceVector<T>::DeviceSpan(int device) {
+  return impl_->DeviceSpan(device);
+}
+
 template <typename T>
 size_t HostDeviceVector<T>::DeviceStart(int device) { return impl_->DeviceStart(device); }
 

diff --git a/src/common/host_device_vector.h b/src/common/host_device_vector.h
@@ -11,6 +11,8 @@
 #include <initializer_list>
 #include <vector>
 
+#include "span.h"
+
 // only include thrust-related files if host_device_vector.h
 // is included from a .cu file
 #ifdef __CUDACC__
@@ -117,6 +119,7 @@ class HostDeviceVector {
   size_t Size() const;
   GPUSet Devices() const;
   T* DevicePointer(int device);
+  common::Span<T> DeviceSpan(int device);
 
   T* HostPointer() { return HostVector().data(); }
   size_t DeviceStart(int device);

diff --git a/src/objective/regression_obj_gpu.cu b/src/objective/regression_obj_gpu.cu
@@ -12,6 +12,7 @@
 #include <memory>
 #include <vector>
 
+#include "../common/span.h"
 #include "../common/device_helpers.cuh"
 #include "../common/host_device_vector.h"
 #include "./regression_loss.h"
@@ -44,8 +45,8 @@ struct GPURegLossParam : public dmlc::Parameter<GPURegLossParam> {
 // GPU kernel for gradient computation
 template<typename Loss>
 __global__ void get_gradient_k
-(GradientPair *__restrict__ out_gpair,  unsigned int *__restrict__ label_correct,
- const float * __restrict__ preds, const float * __restrict__ labels,
+(common::Span<GradientPair> out_gpair,  common::Span<unsigned int> label_correct,
+ common::Span<const float> preds, common::Span<const float> labels,
  const float * __restrict__ weights, int n, float scale_pos_weight) {
   int i = threadIdx.x + blockIdx.x * blockDim.x;
   if (i >= n)
@@ -56,14 +57,14 @@ __global__ void get_gradient_k
   if (label == 1.0f)
     w *= scale_pos_weight;
   if (!Loss::CheckLabel(label))
-    atomicAnd(label_correct, 0);
+    atomicAnd(label_correct.data(), 0);
   out_gpair[i] = GradientPair
     (Loss::FirstOrderGradient(p, label) * w, Loss::SecondOrderGradient(p, label) * w);
 }
 
 // GPU kernel for predicate transformation
 template<typename Loss>
-__global__ void pred_transform_k(float * __restrict__ preds, int n) {
+__global__ void pred_transform_k(common::Span<float> preds, int n) {
   int i = threadIdx.x + blockIdx.x * blockDim.x;
   if (i >= n)
     return;
@@ -144,8 +145,8 @@ class GPURegLossObj : public ObjFunction {
       size_t n = preds->DeviceSize(d);
       if (n > 0) {
         get_gradient_k<Loss><<<dh::DivRoundUp(n, block), block>>>
-          (out_gpair->DevicePointer(d), label_correct_.DevicePointer(d),
-           preds->DevicePointer(d), labels_.DevicePointer(d),
+          (out_gpair->DeviceSpan(d), label_correct_.DeviceSpan(d),
+           preds->DeviceSpan(d), labels_.DeviceSpan(d),
            info.weights_.size() > 0 ? weights_.DevicePointer(d) : nullptr,
            n, param_.scale_pos_weight);
         dh::safe_cuda(cudaGetLastError());
@@ -180,7 +181,8 @@ class GPURegLossObj : public ObjFunction {
       const int block = 256;
       size_t n = preds->DeviceSize(d);
       if (n > 0) {
-        pred_transform_k<Loss><<<dh::DivRoundUp(n, block), block>>>(preds->DevicePointer(d), n);
+        pred_transform_k<Loss><<<dh::DivRoundUp(n, block), block>>>(
+            preds->DeviceSpan(d), n);
         dh::safe_cuda(cudaGetLastError());
       }
       dh::safe_cuda(cudaDeviceSynchronize());

diff --git a/tests/cpp/common/test_host_device_vector.cu b/tests/cpp/common/test_host_device_vector.cu
@@ -0,0 +1,22 @@
+/*!
+ * Copyright 2018 XGBoost contributors
+ */
+
+#include <gtest/gtest.h>
+#include "../../../src/common/host_device_vector.h"
+#include "../../../src/common/device_helpers.cuh"
+
+namespace xgboost {
+namespace common {
+
+TEST(HostDeviceVector, Span) {
+  HostDeviceVector<float> vec {1.0f, 2.0f, 3.0f, 4.0f};
+  vec.Reshard(GPUSet{0, 1});
+  auto span = vec.DeviceSpan(0);
+  ASSERT_EQ(vec.Size(), span.size());
+  ASSERT_EQ(vec.DevicePointer(0), span.data());
+}
+
+}  // namespace common
+}  // namespace xgboost
+