Skip to content

Commit

Permalink
Use ellpack for prediction only when sparsepage doesn't exist. (#5504)
Browse files Browse the repository at this point in the history
  • Loading branch information
trivialfis authored Apr 10, 2020
1 parent ad826e9 commit 6671b42
Show file tree
Hide file tree
Showing 35 changed files with 166 additions and 116 deletions.
6 changes: 0 additions & 6 deletions include/xgboost/gbm.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,12 +181,6 @@ class GradientBooster : public Model, public Configurable {
const std::string& name,
GenericParameter const* generic_param,
LearnerModelParam const* learner_model_param);

static void AssertGPUSupport() {
#ifndef XGBOOST_USE_CUDA
LOG(FATAL) << "XGBoost version not compiled with GPU support.";
#endif // XGBOOST_USE_CUDA
}
};

/*!
Expand Down
8 changes: 4 additions & 4 deletions src/c_api/c_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ XGB_DLL int XGDMatrixCreateFromArrayInterfaceColumns(char const* c_json_strs,
int nthread,
DMatrixHandle* out) {
API_BEGIN();
LOG(FATAL) << "XGBoost not compiled with CUDA";
common::AssertGPUSupport();
API_END();
}

Expand All @@ -94,7 +94,7 @@ XGB_DLL int XGDMatrixCreateFromArrayInterface(char const* c_json_strs,
int nthread,
DMatrixHandle* out) {
API_BEGIN();
LOG(FATAL) << "XGBoost not compiled with CUDA";
common::AssertGPUSupport();
API_END();
}

Expand Down Expand Up @@ -521,7 +521,7 @@ XGB_DLL int XGBoosterPredictFromArrayInterfaceColumns(BoosterHandle handle,
float const** out_result) {
API_BEGIN();
CHECK_HANDLE();
LOG(FATAL) << "XGBoost not compiled with CUDA.";
common::AssertGPUSupport();
API_END();
}
XGB_DLL int XGBoosterPredictFromArrayInterface(BoosterHandle handle,
Expand All @@ -535,7 +535,7 @@ XGB_DLL int XGBoosterPredictFromArrayInterface(BoosterHandle handle,
const float **out_result) {
API_BEGIN();
CHECK_HANDLE();
LOG(FATAL) << "XGBoost not compiled with CUDA.";
common::AssertGPUSupport();
API_END();
}
#endif // !defined(XGBOOST_USE_CUDA)
Expand Down
7 changes: 7 additions & 0 deletions src/common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,13 @@ class Range {
};

int AllVisibleGPUs();

inline void AssertGPUSupport() {
#ifndef XGBOOST_USE_CUDA
LOG(FATAL) << "XGBoost version not compiled with GPU support.";
#endif // XGBOOST_USE_CUDA
}

} // namespace common
} // namespace xgboost
#endif // XGBOOST_COMMON_COMMON_H_
2 changes: 1 addition & 1 deletion src/data/data.cc
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ void MetaInfo::SetInfo(const char* key, const void* dptr, DataType dtype, size_t

#if !defined(XGBOOST_USE_CUDA)
void MetaInfo::SetInfo(const char * c_key, std::string const& interface_str) {
LOG(FATAL) << "XGBoost version is not compiled with GPU support";
common::AssertGPUSupport();
}
#endif // !defined(XGBOOST_USE_CUDA)

Expand Down
3 changes: 2 additions & 1 deletion src/gbm/gblinear.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

#include "gblinear_model.h"
#include "../common/timer.h"
#include "../common/common.h"

namespace xgboost {
namespace gbm {
Expand Down Expand Up @@ -68,7 +69,7 @@ class GBLinear : public GradientBooster {
updater_->Configure(cfg);
monitor_.Init("GBLinear");
if (param_.updater == "gpu_coord_descent") {
this->AssertGPUSupport();
common::AssertGPUSupport();
}
}

Expand Down
18 changes: 11 additions & 7 deletions src/gbm/gbtree.cc
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ void GBTree::ConfigureUpdaters() {
tparam_.updater_seq = "grow_quantile_histmaker";
break;
case TreeMethod::kGPUHist: {
this->AssertGPUSupport();
common::AssertGPUSupport();
tparam_.updater_seq = "grow_gpu_hist";
break;
}
Expand Down Expand Up @@ -391,17 +391,21 @@ GBTree::GetPredictor(HostDeviceVector<float> const *out_pred,
CHECK(gpu_predictor_);
return gpu_predictor_;
#else
this->AssertGPUSupport();
common::AssertGPUSupport();
#endif // defined(XGBOOST_USE_CUDA)
}
CHECK(cpu_predictor_);
return cpu_predictor_;
}

auto on_device =
f_dmat &&
(f_dmat->PageExists<EllpackPage>() ||
(*(f_dmat->GetBatches<SparsePage>().begin())).data.DeviceCanRead());
// Data comes from Device DMatrix.
auto is_ellpack = f_dmat && f_dmat->PageExists<EllpackPage>() &&
!f_dmat->PageExists<SparsePage>();
// Data comes from device memory, like CuDF or CuPy.
auto is_from_device =
f_dmat && f_dmat->PageExists<SparsePage>() &&
(*(f_dmat->GetBatches<SparsePage>().begin())).data.DeviceCanRead();
auto on_device = is_ellpack || is_from_device;

// Use GPU Predictor if data is already on device and gpu_id is set.
if (on_device && generic_param_->gpu_id >= 0) {
Expand Down Expand Up @@ -434,7 +438,7 @@ GBTree::GetPredictor(HostDeviceVector<float> const *out_pred,
CHECK(gpu_predictor_);
return gpu_predictor_;
#else
this->AssertGPUSupport();
common::AssertGPUSupport();
return cpu_predictor_;
#endif // defined(XGBOOST_USE_CUDA)
}
Expand Down
16 changes: 8 additions & 8 deletions src/predictor/gpu_predictor.cu
Original file line number Diff line number Diff line change
Expand Up @@ -348,21 +348,21 @@ class GPUPredictor : public xgboost::Predictor {
model_.Init(model, tree_begin, tree_end, generic_param_->gpu_id);
out_preds->SetDevice(generic_param_->gpu_id);

if (dmat->PageExists<EllpackPage>()) {
if (dmat->PageExists<SparsePage>()) {
size_t batch_offset = 0;
for (auto &batch : dmat->GetBatches<SparsePage>()) {
this->PredictInternal(batch, model.learner_model_param->num_feature,
out_preds, batch_offset);
batch_offset += batch.Size() * model.learner_model_param->num_output_group;
}
} else {
size_t batch_offset = 0;
for (auto const& page : dmat->GetBatches<EllpackPage>()) {
this->PredictInternal(
page.Impl()->GetDeviceAccessor(generic_param_->gpu_id), out_preds,
batch_offset);
batch_offset += page.Impl()->n_rows;
}
} else {
size_t batch_offset = 0;
for (auto &batch : dmat->GetBatches<SparsePage>()) {
this->PredictInternal(batch, model.learner_model_param->num_feature,
out_preds, batch_offset);
batch_offset += batch.Size() * model.learner_model_param->num_output_group;
}
}
}

Expand Down
4 changes: 2 additions & 2 deletions tests/cpp/c_api/test_c_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ TEST(CAPI, Version) {

TEST(CAPI, ConfigIO) {
size_t constexpr kRows = 10;
auto p_dmat = RandomDataGenerator(kRows, 10, 0).GenerateDMatix();
auto p_dmat = RandomDataGenerator(kRows, 10, 0).GenerateDMatrix();
std::vector<std::shared_ptr<DMatrix>> mat {p_dmat};
std::vector<bst_float> labels(kRows);
for (size_t i = 0; i < labels.size(); ++i) {
Expand Down Expand Up @@ -115,7 +115,7 @@ TEST(CAPI, JsonModelIO) {
size_t constexpr kRows = 10;
dmlc::TemporaryDirectory tempdir;

auto p_dmat = RandomDataGenerator(kRows, 10, 0).GenerateDMatix();
auto p_dmat = RandomDataGenerator(kRows, 10, 0).GenerateDMatrix();
std::vector<std::shared_ptr<DMatrix>> mat {p_dmat};
std::vector<bst_float> labels(kRows);
for (size_t i = 0; i < labels.size(); ++i) {
Expand Down
6 changes: 3 additions & 3 deletions tests/cpp/common/test_column_matrix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ TEST(DenseColumn, Test) {
static_cast<uint64_t>(std::numeric_limits<uint16_t>::max()) + 1,
static_cast<uint64_t>(std::numeric_limits<uint16_t>::max()) + 2};
for (size_t max_num_bin : max_num_bins) {
auto dmat = RandomDataGenerator(100, 10, 0.0).GenerateDMatix();
auto dmat = RandomDataGenerator(100, 10, 0.0).GenerateDMatrix();
GHistIndexMatrix gmat;
gmat.Init(dmat.get(), max_num_bin);
ColumnMatrix column_matrix;
Expand Down Expand Up @@ -61,7 +61,7 @@ TEST(SparseColumn, Test) {
static_cast<uint64_t>(std::numeric_limits<uint16_t>::max()) + 1,
static_cast<uint64_t>(std::numeric_limits<uint16_t>::max()) + 2};
for (size_t max_num_bin : max_num_bins) {
auto dmat = RandomDataGenerator(100, 1, 0.85).GenerateDMatix();
auto dmat = RandomDataGenerator(100, 1, 0.85).GenerateDMatrix();
GHistIndexMatrix gmat;
gmat.Init(dmat.get(), max_num_bin);
ColumnMatrix column_matrix;
Expand Down Expand Up @@ -102,7 +102,7 @@ TEST(DenseColumnWithMissing, Test) {
static_cast<uint64_t>(std::numeric_limits<uint16_t>::max()) + 1,
static_cast<uint64_t>(std::numeric_limits<uint16_t>::max()) + 2 };
for (size_t max_num_bin : max_num_bins) {
auto dmat = RandomDataGenerator(100, 1, 0.5).GenerateDMatix();
auto dmat = RandomDataGenerator(100, 1, 0.5).GenerateDMatrix();
GHistIndexMatrix gmat;
gmat.Init(dmat.get(), max_num_bin);
ColumnMatrix column_matrix;
Expand Down
16 changes: 8 additions & 8 deletions tests/cpp/common/test_hist_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ TEST(CutsBuilder, SearchGroupInd) {
size_t constexpr kRows = 17;
size_t constexpr kCols = 15;

auto p_mat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatix();
auto p_mat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();

std::vector<bst_int> group(kNumGroups);
group[0] = 2;
Expand All @@ -155,7 +155,7 @@ TEST(SparseCuts, SingleThreadedBuild) {
size_t constexpr kCols = 31;
size_t constexpr kBins = 256;

auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatix();
auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();

common::GHistIndexMatrix hmat;
hmat.Init(p_fmat.get(), kBins);
Expand Down Expand Up @@ -206,12 +206,12 @@ TEST(SparseCuts, MultiThreadedBuild) {
};

{
auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatix();
auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
Compare(p_fmat.get());
}

{
auto p_fmat = RandomDataGenerator(kRows, kCols, 0.0001).GenerateDMatix();
auto p_fmat = RandomDataGenerator(kRows, kCols, 0.0001).GenerateDMatrix();
Compare(p_fmat.get());
}

Expand Down Expand Up @@ -360,7 +360,7 @@ TEST(HistUtil, IndexBinBound) {

size_t bin_id = 0;
for (auto max_bin : bin_sizes) {
auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatix();
auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();

common::GHistIndexMatrix hmat;
hmat.Init(p_fmat.get(), max_bin);
Expand All @@ -381,7 +381,7 @@ TEST(HistUtil, SparseIndexBinBound) {

size_t bin_id = 0;
for (auto max_bin : bin_sizes) {
auto p_fmat = RandomDataGenerator(kRows, kCols, 0.2).GenerateDMatix();
auto p_fmat = RandomDataGenerator(kRows, kCols, 0.2).GenerateDMatrix();
common::GHistIndexMatrix hmat;
hmat.Init(p_fmat.get(), max_bin);
EXPECT_EQ(expected_bin_type_sizes[bin_id++], hmat.index.GetBinTypeSize());
Expand All @@ -404,7 +404,7 @@ TEST(HistUtil, IndexBinData) {
size_t constexpr kCols = 10;

for (auto max_bin : kBinSizes) {
auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatix();
auto p_fmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();
common::GHistIndexMatrix hmat;
hmat.Init(p_fmat.get(), max_bin);
uint32_t* offsets = hmat.index.Offset();
Expand Down Expand Up @@ -434,7 +434,7 @@ TEST(HistUtil, SparseIndexBinData) {
size_t constexpr kCols = 10;

for (auto max_bin : bin_sizes) {
auto p_fmat = RandomDataGenerator(kRows, kCols, 0.2).GenerateDMatix();
auto p_fmat = RandomDataGenerator(kRows, kCols, 0.2).GenerateDMatrix();
common::GHistIndexMatrix hmat;
hmat.Init(p_fmat.get(), max_bin);
EXPECT_EQ(hmat.index.Offset(), nullptr);
Expand Down
2 changes: 1 addition & 1 deletion tests/cpp/data/test_adapter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ TEST(Adapter, CSCAdapterColsMoreThanRows) {
}

TEST(CAPI, DMatrixSliceAdapterFromSimpleDMatrix) {
auto p_dmat = RandomDataGenerator(6, 2, 1.0).GenerateDMatix();
auto p_dmat = RandomDataGenerator(6, 2, 1.0).GenerateDMatrix();

std::vector<int> ridx_set = {1, 3, 5};
data::DMatrixSliceAdapter adapter(p_dmat.get(),
Expand Down
4 changes: 2 additions & 2 deletions tests/cpp/data/test_device_dmatrix.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ TEST(DeviceDMatrix, RowMajor) {
auto adapter = common::AdapterFromData(x_device, num_rows, num_columns);

data::DeviceDMatrix dmat(&adapter,
std::numeric_limits<float>::quiet_NaN(), 1, 256);
std::numeric_limits<float>::quiet_NaN(), 1, 256);

auto &batch = *dmat.GetBatches<EllpackPage>({0, 256, 0}).begin();
auto impl = batch.Impl();
Expand Down Expand Up @@ -60,7 +60,7 @@ TEST(DeviceDMatrix, RowMajorMissing) {
EXPECT_EQ(iterator[1], impl->GetDeviceAccessor(0).NullValue());
EXPECT_EQ(iterator[5], impl->GetDeviceAccessor(0).NullValue());
// null values get placed after valid values in a row
EXPECT_EQ(iterator[7], impl->GetDeviceAccessor(0).NullValue());
EXPECT_EQ(iterator[7], impl->GetDeviceAccessor(0).NullValue());
EXPECT_EQ(dmat.Info().num_col_, num_columns);
EXPECT_EQ(dmat.Info().num_row_, num_rows);
EXPECT_EQ(dmat.Info().num_nonzero_, num_rows*num_columns-3);
Expand Down
2 changes: 1 addition & 1 deletion tests/cpp/data/test_ellpack_page.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ namespace xgboost {
TEST(EllpackPage, EmptyDMatrix) {
constexpr int kNRows = 0, kNCols = 0, kMaxBin = 256;
constexpr float kSparsity = 0;
auto dmat = RandomDataGenerator(kNRows, kNCols, kSparsity).GenerateDMatix();
auto dmat = RandomDataGenerator(kNRows, kNCols, kSparsity).GenerateDMatrix();
auto& page = *dmat->GetBatches<EllpackPage>({0, kMaxBin}).begin();
auto impl = page.Impl();
ASSERT_EQ(impl->row_stride, 0);
Expand Down
2 changes: 1 addition & 1 deletion tests/cpp/data/test_simple_dmatrix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ TEST(SimpleDMatrix, FromFile) {
TEST(SimpleDMatrix, Slice) {
const int kRows = 6;
const int kCols = 2;
auto p_dmat = RandomDataGenerator(kRows, kCols, 1.0).GenerateDMatix();
auto p_dmat = RandomDataGenerator(kRows, kCols, 1.0).GenerateDMatrix();
auto &labels = p_dmat->Info().labels_.HostVector();
auto &weights = p_dmat->Info().weights_.HostVector();
auto &base_margin = p_dmat->Info().base_margin_.HostVector();
Expand Down
7 changes: 4 additions & 3 deletions tests/cpp/gbm/test_gbtree.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ TEST(GBTree, WrongUpdater) {
size_t constexpr kRows = 17;
size_t constexpr kCols = 15;

auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatix();
auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();

p_dmat->Info().labels_.Resize(kRows);

Expand All @@ -67,10 +67,11 @@ TEST(GBTree, WrongUpdater) {

#ifdef XGBOOST_USE_CUDA
TEST(GBTree, ChoosePredictor) {
// The test ensures data don't get pulled into device.
size_t constexpr kRows = 17;
size_t constexpr kCols = 15;

auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatix();
auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();

auto& data = (*(p_dmat->GetBatches<SparsePage>().begin())).data;
p_dmat->Info().labels_.Resize(kRows);
Expand Down Expand Up @@ -195,7 +196,7 @@ TEST(Dart, JsonIO) {
TEST(Dart, Prediction) {
size_t constexpr kRows = 16, kCols = 10;

auto p_mat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatix();
auto p_mat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();

std::vector<bst_float> labels (kRows);
for (size_t i = 0; i < kRows; ++i) {
Expand Down
6 changes: 3 additions & 3 deletions tests/cpp/helpers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -260,8 +260,8 @@ void RandomDataGenerator::GenerateCSR(
}

std::shared_ptr<DMatrix>
RandomDataGenerator::GenerateDMatix(bool with_label, bool float_label,
size_t classes) const {
RandomDataGenerator::GenerateDMatrix(bool with_label, bool float_label,
size_t classes) const {
HostDeviceVector<float> data;
HostDeviceVector<bst_row_t> rptrs;
HostDeviceVector<bst_feature_t> columns;
Expand Down Expand Up @@ -399,7 +399,7 @@ std::unique_ptr<GradientBooster> CreateTrainedGBM(
std::unique_ptr<GradientBooster> gbm {
GradientBooster::Create(name, generic_param, learner_model_param)};
gbm->Configure(kwargs);
auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatix();
auto p_dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix();

std::vector<float> labels(kRows);
for (size_t i = 0; i < kRows; ++i) {
Expand Down
17 changes: 17 additions & 0 deletions tests/cpp/helpers.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#include "helpers.h"
#include "../../src/data/device_adapter.cuh"
#include "../../src/data/device_dmatrix.h"

namespace xgboost {
std::shared_ptr<DMatrix> RandomDataGenerator::GenerateDeviceDMatrix(bool with_label,
bool float_label,
size_t classes) {
std::vector<HostDeviceVector<float>> storage(cols_);
std::string arr = this->GenerateColumnarArrayInterface(&storage);
auto adapter = data::CudfAdapter(arr);
std::shared_ptr<DMatrix> m {
new data::DeviceDMatrix{&adapter,
std::numeric_limits<float>::quiet_NaN(), 1, 256}};
return m;
}
} // namespace xgboost
Loading

0 comments on commit 6671b42

Please sign in to comment.