Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

split graph table #3

Merged
merged 41 commits into from
May 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
3faa1d8
enable graph-engine to return all id
seemingwang Apr 27, 2022
3a70c42
change vector's dimension
seemingwang Apr 28, 2022
09455a6
change vector's dimension
seemingwang Apr 28, 2022
acb8ac0
enlarge returned ids dimensions
seemingwang Apr 28, 2022
ff5fa32
add actual_val
DesmonDay Apr 29, 2022
7950086
change vlog
DesmonDay Apr 29, 2022
24cb259
fix bug
DesmonDay Apr 29, 2022
7798771
bug fix
DesmonDay Apr 29, 2022
641fcac
bug fix
DesmonDay Apr 29, 2022
7762561
fix display test
DesmonDay Apr 29, 2022
7cfe661
Merge pull request #26 from DesmonDay/gpu_graph_engine2
seemingwang May 1, 2022
918bb56
singleton of gpu_graph_wrapper
seemingwang May 1, 2022
2e44d09
Merge branch 'gpu_graph_engine2' of https://github.com/seemingwang/Pa…
seemingwang May 1, 2022
b63e2e2
change sample result's structure to fit training
seemingwang May 1, 2022
cfe8d4a
recover sample code
seemingwang May 1, 2022
0d18f57
fix
seemingwang May 1, 2022
0ad3e46
secondary sample
seemingwang May 1, 2022
2606895
add graph partition
seemingwang May 4, 2022
3cfac05
add graph partition
seemingwang May 4, 2022
38f7b15
fix pybind
seemingwang May 4, 2022
d1a74f2
optimize buffer allocation
seemingwang May 10, 2022
2e2dd2a
resolve conflicts
seemingwang May 10, 2022
3c33403
fix node transfer problem
seemingwang May 11, 2022
080e5c9
remove log
seemingwang May 11, 2022
b00a116
support 32G+ graph on single gpu
seemingwang May 12, 2022
97c3f0c
remove logs
seemingwang May 12, 2022
aaa137e
fix
seemingwang May 12, 2022
08a301f
fix
seemingwang May 12, 2022
12168b0
fix cpu query
seemingwang May 12, 2022
ec89107
display info
seemingwang May 14, 2022
c221167
remove log
seemingwang May 15, 2022
ab005da
remove empyt file
seemingwang May 15, 2022
c805cc0
distribute labeled data evenly in graph engine
seemingwang May 18, 2022
32a4341
merge
seemingwang May 18, 2022
8b0b194
split graph_table
seemingwang May 24, 2022
91f5c32
reuse clear graph
seemingwang May 24, 2022
6672b82
optimize vector allocation
seemingwang May 24, 2022
112cf61
fix
seemingwang May 24, 2022
dc8dbf2
rename variables
seemingwang May 25, 2022
f43d085
remove log
seemingwang May 25, 2022
a9b5445
merge
seemingwang May 25, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions paddle/fluid/framework/fleet/heter_ps/gpu_graph_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,21 +123,25 @@ node_list[8]-> node_id:17, neighbor_size:1, neighbor_offset:15
*/
struct NeighborSampleQuery {
int gpu_id;
int64_t *key;
int sample_size;
int table_idx;
int64_t *src_nodes;
int len;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

len应该放在紧临src_nodes后面?这样更清楚的表示为src_nodes的长度

void initialize(int gpu_id, int64_t key, int sample_size, int len) {
int sample_size;
void initialize(int gpu_id, int table_idx, int64_t src_nodes, int sample_size,
int len) {
this->table_idx = table_idx;
this->gpu_id = gpu_id;
this->key = (int64_t *)key;
this->src_nodes = (int64_t *)src_nodes;
this->sample_size = sample_size;
this->len = len;
}
void display() {
int64_t *sample_keys = new int64_t[len];
VLOG(0) << "device_id " << gpu_id << " sample_size = " << sample_size;
VLOG(0) << "there are " << len << " keys ";
VLOG(0) << "there are " << len << " keys to sample for graph " << table_idx;
std::string key_str;
cudaMemcpy(sample_keys, key, len * sizeof(int64_t), cudaMemcpyDeviceToHost);
cudaMemcpy(sample_keys, src_nodes, len * sizeof(int64_t),
cudaMemcpyDeviceToHost);

for (int i = 0; i < len; i++) {
if (key_str.size() > 0) key_str += ";";
Expand Down Expand Up @@ -212,7 +216,7 @@ struct NeighborSampleResult {
std::vector<int64_t> graph;
int64_t *sample_keys = new int64_t[q.len];
std::string key_str;
cudaMemcpy(sample_keys, q.key, q.len * sizeof(int64_t),
cudaMemcpy(sample_keys, q.src_nodes, q.len * sizeof(int64_t),
cudaMemcpyDeviceToHost);
int64_t *res = new int64_t[sample_size * key_size];
cudaMemcpy(res, val, sample_size * key_size * sizeof(int64_t),
Expand Down
49 changes: 34 additions & 15 deletions paddle/fluid/framework/fleet/heter_ps/graph_gpu_ps_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,35 @@
#ifdef PADDLE_WITH_HETERPS
namespace paddle {
namespace framework {
enum GraphTableType { EDGE_TABLE, FEATURE_TABLE };
class GpuPsGraphTable : public HeterComm<uint64_t, int64_t, int> {
public:
GpuPsGraphTable(std::shared_ptr<HeterPsResource> resource, int topo_aware)
int get_table_offset(int gpu_id, GraphTableType type, int idx) {
int type_id = type;
return gpu_id * (graph_table_num_ + feature_table_num_) +
type_id * graph_table_num_ + idx;
}
GpuPsGraphTable(std::shared_ptr<HeterPsResource> resource, int topo_aware,
int graph_table_num, int feature_table_num)
: HeterComm<uint64_t, int64_t, int>(1, resource) {
load_factor_ = 0.25;
rw_lock.reset(new pthread_rwlock_t());
this->graph_table_num_ = graph_table_num;
this->feature_table_num_ = feature_table_num;
gpu_num = resource_->total_device();
memset(global_device_map, -1, sizeof(global_device_map));
for (auto &table : tables_) {
delete table;
table = NULL;
}
tables_ = std::vector<Table *>(
gpu_num * (graph_table_num + feature_table_num), NULL);
sample_status = std::vector<int *>(gpu_num * graph_table_num, NULL);
for (int i = 0; i < gpu_num; i++) {
gpu_graph_list.push_back(GpuPsCommGraph());
global_device_map[resource_->dev_id(i)] = i;
sample_status.push_back(NULL);
tables_.push_back(NULL);
for (int j = 0; j < graph_table_num; j++) {
gpu_graph_list_.push_back(GpuPsCommGraph());
}
}
cpu_table_status = -1;
if (topo_aware) {
Expand Down Expand Up @@ -89,21 +105,23 @@ class GpuPsGraphTable : public HeterComm<uint64_t, int64_t, int> {
// end_graph_sampling();
// }
}
void build_graph_on_single_gpu(GpuPsCommGraph &g, int gpu_id);
void clear_graph_info(int gpu_id);
void build_graph_from_cpu(std::vector<GpuPsCommGraph> &cpu_node_list);
void build_graph_on_single_gpu(GpuPsCommGraph &g, int gpu_id, int idx);
void clear_graph_info(int gpu_id, int index);
void clear_graph_info(int index);
void build_graph_from_cpu(std::vector<GpuPsCommGraph> &cpu_node_list,
int idx);
NodeQueryResult graph_node_sample(int gpu_id, int sample_size);
NeighborSampleResult graph_neighbor_sample_v3(NeighborSampleQuery q,
bool cpu_switch);
NeighborSampleResult graph_neighbor_sample(int gpu_id, int64_t *key,
NeighborSampleResult graph_neighbor_sample(int gpu_id, int idx, int64_t *key,
int sample_size, int len);
NeighborSampleResult graph_neighbor_sample_v2(int gpu_id, int64_t *key,
int sample_size, int len,
bool cpu_query_switch);
NeighborSampleResult graph_neighbor_sample_v2(int gpu_id, int idx,
int64_t *key, int sample_size,
int len, bool cpu_query_switch);
void init_sample_status();
void free_sample_status();
NodeQueryResult query_node_list(int gpu_id, int start, int query_size);
void clear_graph_info();
NodeQueryResult query_node_list(int gpu_id, int idx, int start,
int query_size);
void display_sample_res(void *key, void *val, int len, int sample_len);
void move_neighbor_sample_result_to_source_gpu(int gpu_id, int gpu_num,
int sample_size, int *h_left,
Expand All @@ -112,12 +130,13 @@ class GpuPsGraphTable : public HeterComm<uint64_t, int64_t, int> {
int *actual_sample_size);
int init_cpu_table(const paddle::distributed::GraphParameter &graph);
int gpu_num;
std::vector<GpuPsCommGraph> gpu_graph_list;
int graph_table_num_, feature_table_num_;
std::vector<GpuPsCommGraph> gpu_graph_list_;
int global_device_map[32];
std::vector<int *> sample_status;
const int parallel_sample_size = 1;
const int dim_y = 256;
std::shared_ptr<paddle::distributed::GraphTable> cpu_graph_table;
std::shared_ptr<paddle::distributed::GraphTable> cpu_graph_table_;
std::shared_ptr<pthread_rwlock_t> rw_lock;
mutable std::mutex mutex_;
std::condition_variable cv_;
Expand Down
Loading