Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Fix clang-tidy. #4110

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/common/device_helpers.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -771,7 +771,7 @@ template <typename T>
typename std::iterator_traits<T>::value_type SumReduction(
dh::CubMemory &tmp_mem, T in, int nVals) {
using ValueT = typename std::iterator_traits<T>::value_type;
size_t tmpSize;
size_t tmpSize {0};
ValueT *dummy_out = nullptr;
dh::safe_cuda(cub::DeviceReduce::Sum(nullptr, tmpSize, in, dummy_out, nVals));
// Allocate small extra memory for the return value
Expand Down
29 changes: 19 additions & 10 deletions src/common/host_device_vector.cu
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,13 @@ struct HostDeviceVectorImpl {
}
}

T* Raw() { return data_.data().get(); }
size_t Start() const { return start_; }
size_t DataSize() const { return data_.size(); }
Permissions& Perm() { return perm_d_; }
Permissions const& Perm() const { return perm_d_; }

private:
int device_;
thrust::device_vector<T> data_;
// cached vector size
Expand Down Expand Up @@ -215,41 +222,42 @@ struct HostDeviceVectorImpl {
T* DevicePointer(int device) {
CHECK(distribution_.devices_.Contains(device));
LazySyncDevice(device, GPUAccess::kWrite);
return shards_.at(distribution_.devices_.Index(device)).data_.data().get();
return shards_.at(distribution_.devices_.Index(device)).Raw();
}

const T* ConstDevicePointer(int device) {
CHECK(distribution_.devices_.Contains(device));
LazySyncDevice(device, GPUAccess::kRead);
return shards_.at(distribution_.devices_.Index(device)).data_.data().get();
return shards_.at(distribution_.devices_.Index(device)).Raw();
}

common::Span<T> DeviceSpan(int device) {
GPUSet devices = distribution_.devices_;
CHECK(devices.Contains(device));
LazySyncDevice(device, GPUAccess::kWrite);
return {shards_.at(devices.Index(device)).data_.data().get(),
static_cast<typename common::Span<T>::index_type>(DeviceSize(device))};
return {shards_.at(devices.Index(device)).Raw(),
static_cast<typename common::Span<T>::index_type>(DeviceSize(device))};
}

common::Span<const T> ConstDeviceSpan(int device) {
GPUSet devices = distribution_.devices_;
CHECK(devices.Contains(device));
LazySyncDevice(device, GPUAccess::kRead);
return {shards_.at(devices.Index(device)).data_.data().get(),
static_cast<typename common::Span<const T>::index_type>(DeviceSize(device))};
using SpanInd = typename common::Span<const T>::index_type;
return {shards_.at(devices.Index(device)).Raw(),
static_cast<SpanInd>(DeviceSize(device))};
}

size_t DeviceSize(int device) {
CHECK(distribution_.devices_.Contains(device));
LazySyncDevice(device, GPUAccess::kRead);
return shards_.at(distribution_.devices_.Index(device)).data_.size();
return shards_.at(distribution_.devices_.Index(device)).DataSize();
}

size_t DeviceStart(int device) {
CHECK(distribution_.devices_.Contains(device));
LazySyncDevice(device, GPUAccess::kRead);
return shards_.at(distribution_.devices_.Index(device)).start_;
return shards_.at(distribution_.devices_.Index(device)).Start();
}

thrust::device_ptr<T> tbegin(int device) { // NOLINT
Expand Down Expand Up @@ -388,7 +396,7 @@ struct HostDeviceVectorImpl {
if (perm_h_.CanRead()) {
// data is present, just need to deny access to the device
dh::ExecuteIndexShards(&shards_, [&](int idx, DeviceShard& shard) {
shard.perm_d_.DenyComplementary(access);
shard.Perm().DenyComplementary(access);
});
perm_h_.Grant(access);
return;
Expand All @@ -411,9 +419,10 @@ struct HostDeviceVectorImpl {
bool DeviceCanAccess(int device, GPUAccess access) {
GPUSet devices = distribution_.Devices();
if (!devices.Contains(device)) { return false; }
return shards_.at(devices.Index(device)).perm_d_.CanAccess(access);
return shards_.at(devices.Index(device)).Perm().CanAccess(access);
}

private:
std::vector<T> data_h_;
Permissions perm_h_;
// the total size of the data stored on the devices
Expand Down
33 changes: 17 additions & 16 deletions src/linear/updater_gpu_coordinate.cu
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,8 @@ class GPUCoordinateUpdater : public LinearUpdater {
void Init(
const std::vector<std::pair<std::string, std::string>> &args) override {
tparam_.InitAllowUnknown(args);
selector.reset(FeatureSelector::Create(tparam_.feature_selector));
monitor.Init("GPUCoordinateUpdater");
selector_.reset(FeatureSelector::Create(tparam_.feature_selector));
monitor_.Init("GPUCoordinateUpdater");
}

void LazyInitShards(DMatrix *p_fmat,
Expand Down Expand Up @@ -196,38 +196,38 @@ class GPUCoordinateUpdater : public LinearUpdater {
void Update(HostDeviceVector<GradientPair> *in_gpair, DMatrix *p_fmat,
gbm::GBLinearModel *model, double sum_instance_weight) override {
tparam_.DenormalizePenalties(sum_instance_weight);
monitor.Start("LazyInitShards");
monitor_.Start("LazyInitShards");
this->LazyInitShards(p_fmat, model->param);
monitor.Stop("LazyInitShards");
monitor_.Stop("LazyInitShards");

monitor.Start("UpdateGpair");
monitor_.Start("UpdateGpair");
// Update gpair
dh::ExecuteIndexShards(&shards, [&](int idx, std::unique_ptr<DeviceShard>& shard) {
if (!shard->IsEmpty()) {
shard->UpdateGpair(in_gpair->ConstHostVector(), model->param);
}
});
monitor.Stop("UpdateGpair");
monitor_.Stop("UpdateGpair");

monitor.Start("UpdateBias");
monitor_.Start("UpdateBias");
this->UpdateBias(p_fmat, model);
monitor.Stop("UpdateBias");
monitor_.Stop("UpdateBias");
// prepare for updating the weights
selector->Setup(*model, in_gpair->ConstHostVector(), p_fmat,
tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm,
coord_param_.top_k);
monitor.Start("UpdateFeature");
selector_->Setup(*model, in_gpair->ConstHostVector(), p_fmat,
tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm,
coord_param_.top_k);
monitor_.Start("UpdateFeature");
for (auto group_idx = 0; group_idx < model->param.num_output_group;
++group_idx) {
for (auto i = 0U; i < model->param.num_feature; i++) {
auto fidx = selector->NextFeature(
auto fidx = selector_->NextFeature(
i, *model, group_idx, in_gpair->ConstHostVector(), p_fmat,
tparam_.reg_alpha_denorm, tparam_.reg_lambda_denorm);
if (fidx < 0) break;
this->UpdateFeature(fidx, group_idx, &in_gpair->HostVector(), model);
}
}
monitor.Stop("UpdateFeature");
monitor_.Stop("UpdateFeature");
}

void UpdateBias(DMatrix *p_fmat, gbm::GBLinearModel *model) {
Expand Down Expand Up @@ -288,12 +288,13 @@ class GPUCoordinateUpdater : public LinearUpdater {
});
}

private:
// training parameter
LinearTrainParam tparam_;
CoordinateParam coord_param_;
GPUDistribution dist_;
std::unique_ptr<FeatureSelector> selector;
common::Monitor monitor;
std::unique_ptr<FeatureSelector> selector_;
common::Monitor monitor_;

std::vector<std::unique_ptr<DeviceShard>> shards;
};
Expand Down
16 changes: 8 additions & 8 deletions src/tree/updater_gpu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ struct AddByKey {
* @param instIds instance index buffer
* @return the expected gradient value
*/
HOST_DEV_INLINE GradientPair get(int id,
HOST_DEV_INLINE GradientPair Get(int id,
common::Span<const GradientPair> vals,
common::Span<const int> instIds) {
id = instIds[id];
Expand All @@ -129,7 +129,7 @@ __global__ void CubScanByKeyL1(
int tid = blockIdx.x * BLKDIM_L1L3 + threadIdx.x;
if (tid < size) {
myKey = Abs2UniqueKey(tid, keys, colIds, nodeStart, nUniqKeys);
myValue = get(tid, vals, instIds);
myValue = Get(tid, vals, instIds);
} else {
myKey = kNoneKey;
myValue = {};
Expand Down Expand Up @@ -201,19 +201,19 @@ __global__ void CubScanByKeyL3(common::Span<GradientPair> sums,
int previousKey =
tid == 0 ? kNoneKey
: Abs2UniqueKey(tid - 1, keys, colIds, nodeStart, nUniqKeys);
GradientPair myValue = scans[tid];
GradientPair my_value = scans[tid];
__syncthreads();
if (blockIdx.x > 0 && s_mKeys == previousKey) {
myValue += s_mScans[0];
my_value += s_mScans[0];
}
if (tid == size - 1) {
sums[previousKey] = myValue + get(tid, vals, instIds);
sums[previousKey] = my_value + Get(tid, vals, instIds);
}
if ((previousKey != myKey) && (previousKey >= 0)) {
sums[previousKey] = myValue;
myValue = GradientPair(0.0f, 0.0f);
sums[previousKey] = my_value;
my_value = GradientPair(0.0f, 0.0f);
}
scans[tid] = myValue;
scans[tid] = my_value;
}

/**
Expand Down
3 changes: 2 additions & 1 deletion src/tree/updater_gpu_hist.cu
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ struct DeviceHistogram {
/*! \brief Map nidx to starting index of its histogram. */
std::map<int, size_t> nidx_map;
thrust::device_vector<typename GradientSumT::ValueT> data;
const size_t kStopGrowingSize = 1 << 26; // Do not grow beyond this size
static constexpr size_t kStopGrowingSize = 1 << 26; // Do not grow beyond this size
int n_bins;
int device_id_;

Expand Down Expand Up @@ -1373,6 +1373,7 @@ class GPUHistMakerSpecialised{
/*! List storing device id. */
std::vector<int> device_list_;

private:
DMatrix* p_last_fmat_;
GPUDistribution dist_;
};
Expand Down
4 changes: 2 additions & 2 deletions tests/cpp/data/test_simple_dmatrix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ TEST(SimpleDMatrix, RowAccess) {
xgboost::DMatrix * dmat = xgboost::DMatrix::Load(tmp_file, false, false);

// Loop over the batches and count the records
long row_count = 0;
int64_t row_count = 0;
for (auto &batch : dmat->GetRowBatches()) {
row_count += batch.Size();
}
Expand All @@ -54,7 +54,7 @@ TEST(SimpleDMatrix, ColAccessWithoutBatches) {
ASSERT_TRUE(dmat->SingleColBlock());

// Loop over the batches and assert the data is as expected
long num_col_batch = 0;
int64_t num_col_batch = 0;
for (const auto &batch : dmat->GetSortedColumnBatches()) {
num_col_batch += 1;
EXPECT_EQ(batch.Size(), dmat->Info().num_col_)
Expand Down
4 changes: 2 additions & 2 deletions tests/cpp/predictor/test_cpu_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ TEST(cpu_predictor, Test) {
// Test predict leaf
std::vector<float> leaf_out_predictions;
cpu_predictor->PredictLeaf((*dmat).get(), &leaf_out_predictions, model);
for (int i = 0; i < leaf_out_predictions.size(); i++) {
ASSERT_EQ(leaf_out_predictions[i], 0);
for (auto v : leaf_out_predictions) {
ASSERT_EQ(v, 0);
}

// Test predict contribution
Expand Down
10 changes: 5 additions & 5 deletions tests/cpp/tree/test_gpu_exact.cu
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@ TEST(GPUExact, Update) {
auto* p_gpuexact_maker = TreeUpdater::Create("grow_gpu");
p_gpuexact_maker->Init(args);

size_t constexpr n_rows = 4;
size_t constexpr n_cols = 8;
size_t constexpr kNRows = 4;
size_t constexpr kNCols = 8;
bst_float constexpr sparsity = 0.0f;

auto dmat = CreateDMatrix(n_rows, n_cols, sparsity, 3);
std::vector<GradientPair> h_gpair(n_rows);
for (size_t i = 0; i < n_rows; ++i) {
auto dmat = CreateDMatrix(kNRows, kNCols, sparsity, 3);
std::vector<GradientPair> h_gpair(kNRows);
for (size_t i = 0; i < kNRows; ++i) {
h_gpair[i] = GradientPair(i % 2, 1);
}
HostDeviceVector<GradientPair> gpair (h_gpair);
Expand Down
Loading