Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support F order for the tensor type. #8872

Merged
merged 2 commits into from
Mar 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
160 changes: 123 additions & 37 deletions include/xgboost/linalg.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@

#include <algorithm>
#include <cassert>
#include <cinttypes> // std::int32_t
#include <cstddef> // std::size_t
#include <cinttypes> // for int32_t
#include <cstddef> // for size_t
#include <limits>
#include <string>
#include <tuple>
#include <tuple> // for make_tuple
#include <type_traits>
#include <utility>
#include <vector>
Expand All @@ -37,8 +37,7 @@
#endif // defined (__CUDA__) || defined(__NVCC__)
#endif // LINALG_HD

namespace xgboost {
namespace linalg {
namespace xgboost::linalg {
namespace detail {

struct ArrayInterfaceHandler {
Expand Down Expand Up @@ -86,7 +85,7 @@ template <typename I>
struct RangeTag {
I beg;
I end;
constexpr size_t Size() const { return end - beg; }
[[nodiscard]] constexpr size_t Size() const { return end - beg; }
};

/**
Expand Down Expand Up @@ -158,14 +157,34 @@ inline LINALG_HD int Popc(uint64_t v) {
#endif // compiler
}

template <std::size_t D, typename Head>
LINALG_HD void IndexToArr(std::size_t (&arr)[D], Head head) {
static_assert(std::is_integral<std::remove_reference_t<Head>>::value, "Invalid index type.");
arr[D - 1] = head;
}

/**
* \brief Convert index from parameter pack to C-style array.
*/
template <std::size_t D, typename Head, typename... Rest>
LINALG_HD void IndexToArr(std::size_t (&arr)[D], Head head, Rest &&...index) {
static_assert(sizeof...(Rest) < D, "Index overflow.");
static_assert(std::is_integral<std::remove_reference_t<Head>>::value, "Invalid index type.");
arr[D - sizeof...(Rest) - 1] = head;
IndexToArr(arr, std::forward<Rest>(index)...);
}

template <class T, std::size_t N, std::size_t... Idx>
constexpr auto Arr2Tup(T (&arr)[N], std::index_sequence<Idx...>) {
constexpr auto ArrToTuple(T (&arr)[N], std::index_sequence<Idx...>) {
return std::make_tuple(arr[Idx]...);
}

/**
* \brief Convert C-styple array to std::tuple.
*/
template <class T, std::size_t N>
constexpr auto Arr2Tup(T (&arr)[N]) {
return Arr2Tup(arr, std::make_index_sequence<N>{});
constexpr auto ArrToTuple(T (&arr)[N]) {
return ArrToTuple(arr, std::make_index_sequence<N>{});
}

// uint division optimization inspired by the CIndexer in cupy. Division operation is
Expand All @@ -188,7 +207,7 @@ LINALG_HD auto UnravelImpl(I idx, common::Span<size_t const, D> shape) {
}
}
index[0] = idx;
return Arr2Tup(index);
return ArrToTuple(index);
}

template <size_t dim, typename I, int32_t D>
Expand Down Expand Up @@ -252,6 +271,11 @@ constexpr detail::RangeTag<I> Range(I beg, I end) {
return {beg, end};
}

enum Order : std::uint8_t {
kC, // Row major
kF, // Col major
};

/**
* \brief A tensor view with static type and dimension. It implements indexing and slicing.
*
Expand Down Expand Up @@ -377,7 +401,11 @@ class TensorView {
* \param device Device ordinal
*/
template <typename I, int32_t D>
LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], int32_t device)
LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], std::int32_t device)
: TensorView{data, shape, device, Order::kC} {}

template <typename I, int32_t D>
LINALG_HD TensorView(common::Span<T> data, I const (&shape)[D], std::int32_t device, Order order)
: data_{data}, ptr_{data_.data()}, device_{device} {
static_assert(D > 0 && D <= kDim, "Invalid shape.");
// shape
Expand All @@ -386,7 +414,19 @@ class TensorView {
shape_[i] = 1;
}
// stride
detail::CalcStride(shape_, stride_);
switch (order) {
case Order::kC: {
detail::CalcStride(shape_, stride_);
break;
}
case Order::kF: {
detail::CalcStride<kDim, true>(shape_, stride_);
break;
}
default: {
SPAN_CHECK(false);
}
}
// size
this->CalcSize();
}
Expand Down Expand Up @@ -490,17 +530,17 @@ class TensorView {
/**
* \brief Number of items in the tensor.
*/
LINALG_HD size_t Size() const { return size_; }
LINALG_HD [[nodiscard]] std::size_t Size() const { return size_; }
/**
* \brief Whether this is a contiguous array, both C and F contiguous returns true.
*/
LINALG_HD bool Contiguous() const {
LINALG_HD [[nodiscard]] bool Contiguous() const {
return data_.size() == this->Size() || this->CContiguous() || this->FContiguous();
}
/**
* \brief Whether it's a c-contiguous array.
*/
LINALG_HD bool CContiguous() const {
LINALG_HD [[nodiscard]] bool CContiguous() const {
StrideT stride;
static_assert(std::is_same<decltype(stride), decltype(stride_)>::value);
// It's contiguous if the stride can be calculated from shape.
Expand All @@ -510,7 +550,7 @@ class TensorView {
/**
* \brief Whether it's a f-contiguous array.
*/
LINALG_HD bool FContiguous() const {
LINALG_HD [[nodiscard]] bool FContiguous() const {
StrideT stride;
static_assert(std::is_same<decltype(stride), decltype(stride_)>::value);
// It's contiguous if the stride can be calculated from shape.
Expand All @@ -530,16 +570,38 @@ class TensorView {
/**
* \brief Constructor for automatic type deduction.
*/
template <typename Container, typename I, int32_t D,
std::enable_if_t<!common::detail::IsSpan<Container>::value> * = nullptr>
auto MakeTensorView(Container &data, I const (&shape)[D], int32_t device) { // NOLINT
template <typename Container, typename... S,
std::enable_if_t<!common::detail::IsSpan<Container>::value &&
!std::is_pointer_v<Container>> * = nullptr>
auto MakeTensorView(Context const *ctx, Container &data, S &&...shape) { // NOLINT
using T = typename Container::value_type;
return TensorView<T, D>{data, shape, device};
std::size_t in_shape[sizeof...(S)];
detail::IndexToArr(in_shape, std::forward<S>(shape)...);
return TensorView<T, sizeof...(S)>{data, in_shape, ctx->gpu_id};
}

template <typename T, typename... S>
LINALG_HD auto MakeTensorView(std::int32_t device, common::Span<T> data, S &&...shape) {
std::size_t in_shape[sizeof...(S)];
detail::IndexToArr(in_shape, std::forward<S>(shape)...);
return TensorView<T, sizeof...(S)>{data, in_shape, device};
}

template <typename T, typename I, int32_t D>
LINALG_HD auto MakeTensorView(common::Span<T> data, I const (&shape)[D], int32_t device) {
return TensorView<T, D>{data, shape, device};
template <typename T, typename... S>
auto MakeTensorView(Context const *ctx, common::Span<T> data, S &&...shape) {
return MakeTensorView(ctx->gpu_id, data, std::forward<S>(shape)...);
}

template <typename T, typename... S>
auto MakeTensorView(Context const *ctx, HostDeviceVector<T> *data, S &&...shape) {
auto span = ctx->IsCPU() ? data->HostSpan() : data->DeviceSpan();
return MakeTensorView(ctx->gpu_id, span, std::forward<S>(shape)...);
}

template <typename T, typename... S>
auto MakeTensorView(Context const *ctx, HostDeviceVector<T> const *data, S &&...shape) {
auto span = ctx->IsCPU() ? data->ConstHostSpan() : data->ConstDeviceSpan();
return MakeTensorView(ctx->gpu_id, span, std::forward<S>(shape)...);
}

/**
Expand All @@ -559,6 +621,13 @@ LINALG_HD auto UnravelIndex(size_t idx, std::size_t const (&shape)[D]) {
return UnravelIndex(idx, common::Span<std::size_t const, D>(shape));
}

template <typename... S>
LINALG_HD auto UnravelIndex(std::size_t idx, S... shape) {
std::size_t s[sizeof...(S)];
detail::IndexToArr(s, shape...);
return UnravelIndex(idx, common::Span<std::size_t const, sizeof...(S)>(s));
}

/**
* \brief A view over a vector, specialization of Tensor
*
Expand Down Expand Up @@ -676,6 +745,7 @@ class Tensor {
private:
HostDeviceVector<T> data_;
ShapeT shape_{0};
Order order_{Order::kC};

template <typename I, std::int32_t D>
void Initialize(I const (&shape)[D], std::int32_t device) {
Expand All @@ -701,11 +771,12 @@ class Tensor {
* See \ref TensorView for parameters of this constructor.
*/
template <typename I, int32_t D>
explicit Tensor(I const (&shape)[D], int32_t device)
: Tensor{common::Span<I const, D>{shape}, device} {}
explicit Tensor(I const (&shape)[D], std::int32_t device, Order order = kC)
: Tensor{common::Span<I const, D>{shape}, device, order} {}

template <typename I, size_t D>
explicit Tensor(common::Span<I const, D> shape, int32_t device) {
explicit Tensor(common::Span<I const, D> shape, std::int32_t device, Order order = kC)
: order_{order} {
// No device unroll as this is a host only function.
std::copy(shape.data(), shape.data() + D, shape_);
for (auto i = D; i < kDim; ++i) {
Expand All @@ -724,16 +795,18 @@ class Tensor {
* Initialize from 2 host iterators.
*/
template <typename It, typename I, int32_t D>
explicit Tensor(It begin, It end, I const (&shape)[D], int32_t device) {
explicit Tensor(It begin, It end, I const (&shape)[D], std::int32_t device, Order order = kC)
: order_{order} {
auto &h_vec = data_.HostVector();
h_vec.insert(h_vec.begin(), begin, end);
// shape
this->Initialize(shape, device);
}

template <typename I, int32_t D>
explicit Tensor(std::initializer_list<T> data, I const (&shape)[D],
int32_t device = Context::kCpuId) {
explicit Tensor(std::initializer_list<T> data, I const (&shape)[D], std::int32_t device,
Order order = kC)
: order_{order} {
auto &h_vec = data_.HostVector();
h_vec = data;
// shape
Expand Down Expand Up @@ -763,27 +836,27 @@ class Tensor {
if (device >= 0) {
data_.SetDevice(device);
auto span = data_.DeviceSpan();
return {span, shape_, device};
return {span, shape_, device, order_};
} else {
auto span = data_.HostSpan();
return {span, shape_, device};
return {span, shape_, device, order_};
}
}
TensorView<T const, kDim> View(int32_t device) const {
if (device >= 0) {
data_.SetDevice(device);
auto span = data_.ConstDeviceSpan();
return {span, shape_, device};
return {span, shape_, device, order_};
} else {
auto span = data_.ConstHostSpan();
return {span, shape_, device};
return {span, shape_, device, order_};
}
}

auto HostView() const { return this->View(-1); }
auto HostView() { return this->View(-1); }

size_t Size() const { return data_.Size(); }
[[nodiscard]] size_t Size() const { return data_.Size(); }
auto Shape() const { return common::Span<size_t const, kDim>{shape_}; }
auto Shape(size_t i) const { return shape_[i]; }

Expand Down Expand Up @@ -837,12 +910,26 @@ class Tensor {
void Reshape(size_t (&shape)[D]) {
this->Reshape(common::Span<size_t const, D>{shape});
}
/**
* \brief Get a host view on the slice.
*/
template <typename... S>
auto Slice(S &&...slices) const {
return this->HostView().Slice(std::forward<S>(slices)...);
}
/**
* \brief Get a host view on the slice.
*/
template <typename... S>
auto Slice(S &&...slices) {
return this->HostView().Slice(std::forward<S>(slices)...);
}

/**
* \brief Set device ordinal for this tensor.
*/
void SetDevice(int32_t device) const { data_.SetDevice(device); }
int32_t DeviceIdx() const { return data_.DeviceIdx(); }
[[nodiscard]] int32_t DeviceIdx() const { return data_.DeviceIdx(); }
};

template <typename T>
Expand Down Expand Up @@ -900,8 +987,7 @@ void Stack(Tensor<T, D> *l, Tensor<T, D> const &r) {
shape[0] = l->Shape(0) + r.Shape(0);
});
}
} // namespace linalg
} // namespace xgboost
} // namespace xgboost::linalg

#if defined(LINALG_HD)
#undef LINALG_HD
Expand Down
5 changes: 2 additions & 3 deletions src/metric/elementwise_metric.cu
Original file line number Diff line number Diff line change
Expand Up @@ -451,9 +451,8 @@ class QuantileError : public MetricNoCache {
auto alpha = ctx->IsCPU() ? alpha_.ConstHostSpan() : alpha_.ConstDeviceSpan();
std::size_t n_targets = preds.Size() / info.num_row_ / alpha_.Size();
CHECK_NE(n_targets, 0);
auto y_predt = linalg::MakeTensorView(
ctx->IsCPU() ? preds.ConstHostSpan() : preds.ConstDeviceSpan(),
{static_cast<std::size_t>(info.num_row_), alpha_.Size(), n_targets}, ctx->gpu_id);
auto y_predt = linalg::MakeTensorView(ctx, &preds, static_cast<std::size_t>(info.num_row_),
alpha_.Size(), n_targets);

info.weights_.SetDevice(ctx->gpu_id);
common::OptionalWeights weight{ctx->IsCPU() ? info.weights_.ConstHostSpan()
Expand Down
15 changes: 5 additions & 10 deletions src/objective/adaptive.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,7 @@
#include "xgboost/span.h" // Span
#include "xgboost/tree_model.h" // RegTree

namespace xgboost {
namespace obj {
namespace detail {
namespace xgboost::obj::detail {
void EncodeTreeLeafHost(Context const* ctx, RegTree const& tree,
std::vector<bst_node_t> const& position, std::vector<size_t>* p_nptr,
std::vector<bst_node_t>* p_nidx, std::vector<size_t>* p_ridx) {
Expand Down Expand Up @@ -98,8 +96,8 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& posit
auto const& h_node_idx = nidx;
auto const& h_node_ptr = nptr;
CHECK_LE(h_node_ptr.back(), info.num_row_);
auto h_predt = linalg::MakeTensorView(predt.ConstHostSpan(),
{info.num_row_, predt.Size() / info.num_row_}, ctx->gpu_id);
auto h_predt = linalg::MakeTensorView(ctx, predt.ConstHostSpan(), info.num_row_,
predt.Size() / info.num_row_);

// loop over each leaf
common::ParallelFor(quantiles.size(), ctx->Threads(), [&](size_t k) {
Expand Down Expand Up @@ -138,11 +136,8 @@ void UpdateTreeLeafHost(Context const* ctx, std::vector<bst_node_t> const& posit

#if !defined(XGBOOST_USE_CUDA)
void UpdateTreeLeafDevice(Context const*, common::Span<bst_node_t const>, std::int32_t,
MetaInfo const&, float learning_rate, HostDeviceVector<float> const&,
float, RegTree*) {
MetaInfo const&, float, HostDeviceVector<float> const&, float, RegTree*) {
common::AssertGPUSupport();
}
#endif // !defined(XGBOOST_USE_CUDA)
} // namespace detail
} // namespace obj
} // namespace xgboost
} // namespace xgboost::obj::detail
4 changes: 2 additions & 2 deletions src/objective/adaptive.cu
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,8 @@ void UpdateTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
HostDeviceVector<float> quantiles;
predt.SetDevice(ctx->gpu_id);

auto d_predt = linalg::MakeTensorView(predt.ConstDeviceSpan(),
{info.num_row_, predt.Size() / info.num_row_}, ctx->gpu_id);
auto d_predt = linalg::MakeTensorView(ctx, predt.ConstDeviceSpan(), info.num_row_,
predt.Size() / info.num_row_);
CHECK_LT(group_idx, d_predt.Shape(1));
auto t_predt = d_predt.Slice(linalg::All(), group_idx);
auto d_labels = info.labels.View(ctx->gpu_id).Slice(linalg::All(), IdxY(info, group_idx));
Expand Down
Loading