Skip to content

Commit

Permalink
Implement transform to reduce CPU/GPU code duplication.
Browse files Browse the repository at this point in the history
* Implement Transform class.
* Add tests for softmax.
* Use Transform in regression, softmax and hinge objectives, except for Cox.
* Mark old gpu objective functions deprecated.
* static_assert for softmax.
* Split up multi-gpu tests.
  • Loading branch information
trivialfis committed Sep 30, 2018
1 parent 70d208d commit faed691
Show file tree
Hide file tree
Showing 31 changed files with 1,513 additions and 997 deletions.
4 changes: 3 additions & 1 deletion src/common/common.cc
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
/*!
* Copyright 2015 by Contributors
* Copyright 2015-2018 by Contributors
* \file common.cc
* \brief Enable all kinds of global variables in common.
*/
#include <dmlc/thread_local.h>

#include "common.h"
#include "./random.h"

namespace xgboost {
Expand Down
2 changes: 1 addition & 1 deletion src/common/common.cu
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ int AllVisibleImpl::AllVisible() {
// When compiled with CUDA but running on CPU only device,
// cudaGetDeviceCount will fail.
dh::safe_cuda(cudaGetDeviceCount(&n_visgpus));
} catch(const std::exception& e) {
} catch(const thrust::system::system_error& err) {
return 0;
}
return n_visgpus;
Expand Down
36 changes: 21 additions & 15 deletions src/common/common.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*!
* Copyright 2015 by Contributors
* Copyright 2015-2018 by Contributors
* \file common.h
* \brief Common utilities
*/
Expand All @@ -19,6 +19,13 @@
#if defined(__CUDACC__)
#include <thrust/system/cuda/error.h>
#include <thrust/system_error.h>

#define WITH_CUDA() true

#else

#define WITH_CUDA() false

#endif

namespace dh {
Expand All @@ -29,11 +36,11 @@ namespace dh {
#define safe_cuda(ans) ThrowOnCudaError((ans), __FILE__, __LINE__)

inline cudaError_t ThrowOnCudaError(cudaError_t code, const char *file,
int line) {
int line) {
if (code != cudaSuccess) {
throw thrust::system_error(code, thrust::cuda_category(),
std::string{file} + "(" + // NOLINT
std::to_string(line) + ")");
LOG(FATAL) << thrust::system_error(code, thrust::cuda_category(),
std::string{file} + ": " + // NOLINT
std::to_string(line)).what();
}
return code;
}
Expand Down Expand Up @@ -70,13 +77,13 @@ inline std::string ToString(const T& data) {
*/
class Range {
public:
using DifferenceType = int64_t;

class Iterator {
friend class Range;

public:
using DifferenceType = int64_t;

XGBOOST_DEVICE int64_t operator*() const { return i_; }
XGBOOST_DEVICE DifferenceType operator*() const { return i_; }
XGBOOST_DEVICE const Iterator &operator++() {
i_ += step_;
return *this;
Expand All @@ -97,8 +104,8 @@ class Range {
XGBOOST_DEVICE void Step(DifferenceType s) { step_ = s; }

protected:
XGBOOST_DEVICE explicit Iterator(int64_t start) : i_(start) {}
XGBOOST_DEVICE explicit Iterator(int64_t start, int step) :
XGBOOST_DEVICE explicit Iterator(DifferenceType start) : i_(start) {}
XGBOOST_DEVICE explicit Iterator(DifferenceType start, DifferenceType step) :
i_{start}, step_{step} {}

public:
Expand All @@ -109,9 +116,10 @@ class Range {
XGBOOST_DEVICE Iterator begin() const { return begin_; } // NOLINT
XGBOOST_DEVICE Iterator end() const { return end_; } // NOLINT

XGBOOST_DEVICE Range(int64_t begin, int64_t end)
XGBOOST_DEVICE Range(DifferenceType begin, DifferenceType end)
: begin_(begin), end_(end) {}
XGBOOST_DEVICE Range(int64_t begin, int64_t end, Iterator::DifferenceType step)
XGBOOST_DEVICE Range(DifferenceType begin, DifferenceType end,
DifferenceType step)
: begin_(begin, step), end_(end) {}

XGBOOST_DEVICE bool operator==(const Range& other) const {
Expand All @@ -121,9 +129,7 @@ class Range {
return !(*this == other);
}

XGBOOST_DEVICE void Step(Iterator::DifferenceType s) { begin_.Step(s); }

XGBOOST_DEVICE Iterator::DifferenceType GetStep() const { return begin_.step_; }
XGBOOST_DEVICE void Step(DifferenceType s) { begin_.Step(s); }

private:
Iterator begin_;
Expand Down
21 changes: 20 additions & 1 deletion src/common/device_helpers.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <xgboost/logging.h>

#include "common.h"
#include "span.h"

#include <algorithm>
#include <chrono>
Expand Down Expand Up @@ -955,7 +956,7 @@ class SaveCudaContext {
// cudaGetDevice will fail.
try {
safe_cuda(cudaGetDevice(&saved_device_));
} catch (thrust::system::system_error & err) {
} catch (const thrust::system::system_error & err) {
saved_device_ = -1;
}
func();
Expand Down Expand Up @@ -1035,4 +1036,22 @@ ReduceT ReduceShards(std::vector<ShardT> *shards, FunctionT f) {
};
return std::accumulate(sums.begin(), sums.end(), ReduceT());
}

template <typename T,
typename IndexT = typename xgboost::common::Span<T>::index_type>
xgboost::common::Span<T> ToSpan(
thrust::device_vector<T>& vec,
IndexT offset = 0,
IndexT size = -1) {
size = size == -1 ? vec.size() : size;
CHECK_LE(offset + size, vec.size());
return {vec.data().get() + offset, static_cast<IndexT>(size)};
}

template <typename T>
xgboost::common::Span<T> ToSpan(thrust::device_vector<T>& vec,
size_t offset, size_t size) {
using IndexT = typename xgboost::common::Span<T>::index_type;
return ToSpan(vec, static_cast<IndexT>(offset), static_cast<IndexT>(size));
}
} // namespace dh
22 changes: 14 additions & 8 deletions src/common/host_device_vector.cu
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ struct HostDeviceVectorImpl {
int ndevices = vec_->distribution_.devices_.Size();
start_ = vec_->distribution_.ShardStart(new_size, index_);
proper_size_ = vec_->distribution_.ShardProperSize(new_size, index_);
// The size on this device.
size_t size_d = vec_->distribution_.ShardSize(new_size, index_);
SetDevice();
data_.resize(size_d);
Expand Down Expand Up @@ -230,15 +231,15 @@ struct HostDeviceVectorImpl {
CHECK(devices.Contains(device));
LazySyncDevice(device, GPUAccess::kWrite);
return {shards_[devices.Index(device)].data_.data().get(),
static_cast<typename common::Span<T>::index_type>(DeviceSize(device))};
static_cast<typename common::Span<T>::index_type>(DeviceSize(device))};
}

common::Span<const T> ConstDeviceSpan(int device) {
GPUSet devices = distribution_.devices_;
CHECK(devices.Contains(device));
LazySyncDevice(device, GPUAccess::kRead);
return {shards_[devices.Index(device)].data_.data().get(),
static_cast<typename common::Span<const T>::index_type>(DeviceSize(device))};
static_cast<typename common::Span<const T>::index_type>(DeviceSize(device))};
}

size_t DeviceSize(int device) {
Expand Down Expand Up @@ -289,7 +290,6 @@ struct HostDeviceVectorImpl {
data_h_.size() * sizeof(T),
cudaMemcpyHostToDevice));
} else {
//
dh::ExecuteShards(&shards_, [&](DeviceShard& shard) { shard.GatherTo(begin); });
}
}
Expand All @@ -304,14 +304,20 @@ struct HostDeviceVectorImpl {

void Copy(HostDeviceVectorImpl<T>* other) {
CHECK_EQ(Size(), other->Size());
// Data is on host.
if (perm_h_.CanWrite() && other->perm_h_.CanWrite()) {
std::copy(other->data_h_.begin(), other->data_h_.end(), data_h_.begin());
} else {
CHECK(distribution_ == other->distribution_);
dh::ExecuteIndexShards(&shards_, [&](int i, DeviceShard& shard) {
shard.Copy(&other->shards_[i]);
});
return;
}
// Data is on device;
if (distribution_ != other->distribution_) {
distribution_ = GPUDistribution();
Reshard(other->Distribution());
size_d_ = other->size_d_;
}
dh::ExecuteIndexShards(&shards_, [&](int i, DeviceShard& shard) {
shard.Copy(&other->shards_[i]);
});
}

void Copy(const std::vector<T>& other) {
Expand Down
7 changes: 5 additions & 2 deletions src/common/host_device_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,11 @@ class GPUDistribution {
}

friend bool operator==(const GPUDistribution& a, const GPUDistribution& b) {
return a.devices_ == b.devices_ && a.granularity_ == b.granularity_ &&
a.overlap_ == b.overlap_ && a.offsets_ == b.offsets_;
bool const res = a.devices_ == b.devices_ &&
a.granularity_ == b.granularity_ &&
a.overlap_ == b.overlap_ &&
a.offsets_ == b.offsets_;
return res;
}

friend bool operator!=(const GPUDistribution& a, const GPUDistribution& b) {
Expand Down
36 changes: 23 additions & 13 deletions src/common/math.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <vector>
#include <cmath>
#include <algorithm>
#include <utility>
#include "avx_helpers.h"

namespace xgboost {
Expand All @@ -29,22 +30,31 @@ inline avx::Float8 Sigmoid(avx::Float8 x) {
}

/*!
* \brief do inplace softmax transformaton on p_rec
* \param p_rec the input/output vector of the values.
* \brief Do inplace softmax transformaton on start to end
*
* \tparam Iterator Input iterator type
*
* \param start Start iterator of input
* \param end end iterator of input
*/
inline void Softmax(std::vector<float>* p_rec) {
std::vector<float> &rec = *p_rec;
float wmax = rec[0];
for (size_t i = 1; i < rec.size(); ++i) {
wmax = std::max(rec[i], wmax);
template <typename Iterator>
XGBOOST_DEVICE inline void Softmax(Iterator start, Iterator end) {
static_assert(std::is_same<bst_float,
typename std::remove_reference<
decltype(std::declval<Iterator>().operator*())>::type
>::value,
"Values should be of type bst_float");
bst_float wmax = *start;
for (Iterator i = start+1; i != end; ++i) {
wmax = fmaxf(*i, wmax);
}
double wsum = 0.0f;
for (float & elem : rec) {
elem = std::exp(elem - wmax);
wsum += elem;
for (Iterator i = start; i != end; ++i) {
*i = expf(*i - wmax);
wsum += *i;
}
for (float & elem : rec) {
elem /= static_cast<float>(wsum);
for (Iterator i = start; i != end; ++i) {
*i /= static_cast<float>(wsum);
}
}

Expand All @@ -56,7 +66,7 @@ inline void Softmax(std::vector<float>* p_rec) {
* \tparam Iterator The type of the iterator.
*/
template<typename Iterator>
inline Iterator FindMaxIndex(Iterator begin, Iterator end) {
XGBOOST_DEVICE inline Iterator FindMaxIndex(Iterator begin, Iterator end) {
Iterator maxit = begin;
for (Iterator it = begin; it != end; ++it) {
if (*it > *maxit) maxit = it;
Expand Down
10 changes: 5 additions & 5 deletions src/common/span.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
*
* https://github.com/Microsoft/GSL/pull/664
*
* FIXME: Group these MSVC workarounds into a manageable place.
* TODO(trivialfis): Group these MSVC workarounds into a manageable place.
*/
#if defined(_MSC_VER) && _MSC_VER < 1910

Expand All @@ -68,7 +68,7 @@ namespace xgboost {
namespace common {

// Usual logging facility is not available inside device code.
// FIXME: Make dmlc check more generic.
// TODO(trivialfis): Make dmlc check more generic.
#define KERNEL_CHECK(cond) \
do { \
if (!(cond)) { \
Expand Down Expand Up @@ -104,11 +104,11 @@ constexpr detail::ptrdiff_t dynamic_extent = -1; // NOLINT

enum class byte : unsigned char {}; // NOLINT

namespace detail {

template <class ElementType, detail::ptrdiff_t Extent = dynamic_extent>
template <class ElementType, detail::ptrdiff_t Extent>
class Span;

namespace detail {

template <typename SpanType, bool IsConst>
class SpanIterator {
using ElementType = typename SpanType::element_type;
Expand Down
Loading

0 comments on commit faed691

Please sign in to comment.