Skip to content

Commit

Permalink
address review feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
rongou committed Apr 26, 2019
1 parent e3ca497 commit 82b3a52
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 32 deletions.
2 changes: 1 addition & 1 deletion src/common/host_device_vector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ template <typename T>
void HostDeviceVector<T>::Shard(GPUSet devices) const { }

template <typename T>
void Reshard(const GPUDistribution &distribution, const GPUDataPreservation preservation) { }
void Reshard(const GPUDistribution &distribution, bool preserve_gpu_data) { }

// explicit instantiations are required, as HostDeviceVector isn't header-only
template class HostDeviceVector<bst_float>;
Expand Down
9 changes: 5 additions & 4 deletions src/common/host_device_vector.cu
Original file line number Diff line number Diff line change
Expand Up @@ -370,13 +370,14 @@ struct HostDeviceVectorImpl {
Shard(GPUDistribution::Block(new_devices));
}

void Reshard(const GPUDistribution &distribution, const GPUDataPreservation preservation) {
void Reshard(const GPUDistribution &distribution, bool preserve_gpu_data) {
if (distribution_ == distribution) { return; }
if (preservation == GPUDataPreservation::kPreserve) {
if (preserve_gpu_data) {
LazySyncHost(GPUAccess::kWrite);
}
distribution_ = distribution;
shards_.clear();
perm_h_.Grant(kWrite);
InitShards();
}

Expand Down Expand Up @@ -603,8 +604,8 @@ void HostDeviceVector<T>::Shard(const GPUDistribution &distribution) const {
}

template <typename T>
void HostDeviceVector<T>::Reshard(const GPUDistribution &distribution, const GPUDataPreservation preservation) {
impl_->Reshard(distribution, preservation);
void HostDeviceVector<T>::Reshard(const GPUDistribution &distribution, bool preserve_gpu_data) {
impl_->Reshard(distribution, preserve_gpu_data);
}

template <typename T>
Expand Down
7 changes: 1 addition & 6 deletions src/common/host_device_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -193,11 +193,6 @@ inline GPUAccess operator-(GPUAccess a, GPUAccess b) {
return static_cast<GPUAccess>(static_cast<int>(a) - static_cast<int>(b));
}

enum GPUDataPreservation {
kPreserve,
kDiscard
};

template <typename T>
class HostDeviceVector {
public:
Expand Down Expand Up @@ -264,7 +259,7 @@ class HostDeviceVector {
/*!
* \brief Change memory distribution.
*/
void Reshard(const GPUDistribution &distribution, const GPUDataPreservation preservation);
void Reshard(const GPUDistribution &distribution, bool preserve_gpu_data=true);

void Resize(size_t new_size, T v = T());

Expand Down
87 changes: 66 additions & 21 deletions tests/cpp/common/test_host_device_vector.cu
Original file line number Diff line number Diff line change
Expand Up @@ -191,15 +191,15 @@ TEST(HostDeviceVector, Shard) {
ASSERT_EQ(vec.Size(), h_vec.size());
auto span = vec.DeviceSpan(0); // sync to device

vec.Reshard(GPUDistribution::Empty(), GPUDataPreservation::kPreserve); // pull back to cpu, empty devices.
vec.Reshard(GPUDistribution::Empty()); // pull back to cpu, empty devices.
ASSERT_EQ(vec.Size(), h_vec.size());
ASSERT_TRUE(vec.Devices().IsEmpty());

auto h_vec_1 = vec.HostVector();
ASSERT_TRUE(std::equal(h_vec_1.cbegin(), h_vec_1.cend(), h_vec.cbegin()));
}

TEST(HostDeviceVector, Reshard_PreserveGPUData) {
TEST(HostDeviceVector, Reshard) {
std::vector<int> h_vec (2345);
for (size_t i = 0; i < h_vec.size(); ++i) {
h_vec[i] = i;
Expand All @@ -213,36 +213,30 @@ TEST(HostDeviceVector, Reshard_PreserveGPUData) {
auto span = vec.DeviceSpan(0); // sync to device
PlusOne(&vec);

vec.Reshard(GPUDistribution::Empty(), GPUDataPreservation::kPreserve);
// GPU data is preserved.
vec.Reshard(GPUDistribution::Empty());
ASSERT_EQ(vec.Size(), h_vec.size());
ASSERT_TRUE(vec.Devices().IsEmpty());

auto h_vec_1 = vec.HostVector();
for (size_t i = 0; i < h_vec_1.size(); ++i) {
ASSERT_EQ(h_vec_1.at(i), i + 1);
}
}

TEST(HostDeviceVector, Reshard_DiscardGPUData) {
std::vector<int> h_vec (2345);
for (size_t i = 0; i < h_vec.size(); ++i) {
h_vec[i] = i;
}
HostDeviceVector<int> vec (h_vec);
auto devices = GPUSet::Range(0, 1);

vec.Shard(devices);
ASSERT_EQ(vec.DeviceSize(0), h_vec.size());
ASSERT_EQ(vec.Size(), h_vec.size());
auto span = vec.DeviceSpan(0); // sync to device
span = vec.DeviceSpan(0); // sync to device
PlusOne(&vec);

vec.Reshard(GPUDistribution::Empty(), GPUDataPreservation::kDiscard);
// GPU data is discarded.
vec.Reshard(GPUDistribution::Empty(), /*preserve_gpu_data=*/false);
ASSERT_EQ(vec.Size(), h_vec.size());
ASSERT_TRUE(vec.Devices().IsEmpty());

auto h_vec_1 = vec.HostVector();
ASSERT_TRUE(std::equal(h_vec_1.cbegin(), h_vec_1.cend(), h_vec.cbegin()));
auto h_vec_2 = vec.HostVector();
for (size_t i = 0; i < h_vec_2.size(); ++i) {
// The second `PlusOne()` has no effect.
ASSERT_EQ(h_vec_2.at(i), i + 1);
}
}

TEST(HostDeviceVector, Span) {
Expand Down Expand Up @@ -289,7 +283,7 @@ TEST(HostDeviceVector, MGPU_Shard) {
vec.Shard(GPUDistribution::Granular(devices, 12)));

// All data is drawn back to CPU
vec.Reshard(GPUDistribution::Empty(), GPUDataPreservation::kPreserve);
vec.Reshard(GPUDistribution::Empty());
ASSERT_TRUE(vec.Devices().IsEmpty());
ASSERT_EQ(vec.Size(), h_vec.size());

Expand All @@ -301,17 +295,68 @@ TEST(HostDeviceVector, MGPU_Shard) {
}
ASSERT_EQ(total_size, h_vec.size());
ASSERT_EQ(total_size, vec.Size());
}

TEST(HostDeviceVector, MGPU_Reshard) {
auto devices = GPUSet::AllVisible();
if (devices.Size() < 2) {
LOG(WARNING) << "Not testing in multi-gpu environment.";
return;
}

std::vector<int> h_vec (2345);
for (size_t i = 0; i < h_vec.size(); ++i) {
h_vec[i] = i;
}
HostDeviceVector<int> vec (h_vec);

// Data size for each device.
std::vector<size_t> devices_size (devices.Size());

// From CPU to GPUs.
vec.Shard(devices);
for (size_t i = 0; i < devices.Size(); ++i) {
auto span = vec.DeviceSpan(i); // sync to device
}
PlusOne(&vec);

// Reshard is allowed for already sharded vector.
vec.Reshard(GPUDistribution::Overlap(devices, 7), GPUDataPreservation::kPreserve);
total_size = 0;
vec.Reshard(GPUDistribution::Overlap(devices, 7));
size_t total_size = 0;
for (size_t i = 0; i < devices.Size(); ++i) {
total_size += vec.DeviceSize(i);
devices_size[i] = vec.DeviceSize(i);
}
size_t overlap = 7 * (devices.Size() - 1);
ASSERT_EQ(total_size, h_vec.size() + overlap);
ASSERT_EQ(total_size, vec.Size() + overlap);

auto h_vec_1 = vec.HostVector();
for (size_t i = 0; i < h_vec_1.size(); ++i) {
ASSERT_EQ(h_vec_1.at(i), i + 1);
}

for (size_t i = 0; i < devices.Size(); ++i) {
auto span = vec.DeviceSpan(i); // sync to device
}
PlusOne(&vec);

// Reshard again, but discard the GPU data.
vec.Reshard(GPUDistribution::Overlap(devices, 11), /*preserve_gpu_data=*/false);
total_size = 0;
for (size_t i = 0; i < devices.Size(); ++i) {
total_size += vec.DeviceSize(i);
devices_size[i] = vec.DeviceSize(i);
}
overlap = 11 * (devices.Size() - 1);
ASSERT_EQ(total_size, h_vec.size() + overlap);
ASSERT_EQ(total_size, vec.Size() + overlap);

auto h_vec_2 = vec.HostVector();
for (size_t i = 0; i < h_vec_2.size(); ++i) {
// The second `PlusOne()` has no effect.
ASSERT_EQ(h_vec_2.at(i), i + 1);
}
}
#endif

Expand Down

0 comments on commit 82b3a52

Please sign in to comment.