-
Notifications
You must be signed in to change notification settings - Fork 2
/
rw_hub.h
178 lines (150 loc) · 5.89 KB
/
rw_hub.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
#ifndef __RW_HUB_H__
#define __RW_HUB_H__
#include "stat.h"
#include <boost/dynamic_bitset.hpp>
#include <boost/heap/fibonacci_heap.hpp>
#include <boost/format.hpp>
#include <util/graph_yche.h>
#include <util/sfmt_based_rand.h>
#include <util/sparse_matrix_utils.h> // the sparse hash map definition
#include "phf.h"
#include "BooPHF.h"
#include "minimal_perfect_hash.h"
using namespace boost;
using namespace boost::heap;
typedef GraphYche DirectedG;
struct greater
{
template<class T>
bool operator()(T const &a, T const &b) const { return a > b; }
};
struct sort_hub_pred { // sort the utility in decreasing order
bool operator()(const std::pair<size_t,double> &left, const std::pair<size_t,double> &right) {
return left.second > right.second;
}
};
struct Hub_Item{
NodePair np; // the hub of the graph node pairs
double utility;
NodePair position; // the cursor in the sorted utility array
Hub_Item(NodePair & np_, double utility_, NodePair & np2):np(np_), utility(utility_), position(np2){
}
Hub_Item(const Hub_Item & other){
np = other.np;
utility = other.utility;
position = other.position;
}
bool operator<(const Hub_Item& rhs) const{
return utility < rhs.utility;
}
};
inline Hub_Item utility_hub_converter(int a, int b, vector<pair<size_t, double>>& utility){
// produce the hub item based on the position a, b of utility array
NodePair np{utility[a].first, utility[b].first};
double u = utility[a].second * utility[b].second;
NodePair position{a,b};
return Hub_Item(np, u, position);
}
struct Rw_Hubs{
// the container for hubs
typedef boomphf::SingleHashFunctor<u_int64_t> hasher_t; // for BBHash
typedef boomphf::mphf<u_int64_t, hasher_t> boophf_t; // for BBHash
typedef sparse_hash_map<NodePair, vector<int>> Prefix_Sum; // the prefix sum of 1s of of this node pair, size: N * K
typedef sparse_hash_set<size_t> Single_Set; // just a set of integers for the right part of a hub
typedef sparse_hash_map<size_t, pair<boost::dynamic_bitset<> *, size_t>> Hub_Bit_Map; // key: the right part of a hub, value: bitmap, current iterator
typedef vector<pair<boost::dynamic_bitset<> *, size_t >> Second_Hub_Perfect_Bit; // the second leve of the index
typedef vector<pair<Second_Hub_Perfect_Bit, minimal_perfect_hash::MinimalPerfectHash<unsigned int> *>> First_Hub_Perfect_Bit; // the first level of the index
// hub index for random walks
int N; // #hubs
int l; // #samples for each hub
double c; // the decay factor
double lower_bound; // the lower bound utility of top k hubs
DirectedG * g_ptr;
vector<double> utility_array; // the utility array uility[i] is the utility of node i, used in pruning node pairs in bprw, index are node ids
SFMTRand rand_gen; // the random number generator
Rw_Hubs(DirectedG &g, int _N, int _l, double c_ ):N(_N), l(_l),c(c_), g_ptr(&g){
// init the vectors
hubs.resize(g.n); // there are n slots in hubs
this->utility_array.resize(g.n);
this->hub_bits.resize(g.n);
this->hub_perfect_bits.resize(g.n);
}
~Rw_Hubs(){
cout << "destrying perfect hash..." << endl;
cout << "exited Rw_Hubs" << endl;
}
int query_1s(const NodePair &np, int k); // query number 1s in the range of [0,k] of np
bool query_single_pair(const NodePair &np); // query a termination node for a single node pair
bool contains(NodePair &np);
void select_hubs(); // select N hubs for random walks
void sample_random_walks_for_hubs(); // samples random walks
void build_hubs(){// the composition function for building index
select_hubs();
// build_perfect_hash();
sample_random_walks_for_hubs();
}
// some statistic auxiliary variable
int number_of_contains_queries=0;
// index data
Prefix_Sum pre_sum;
vector<Single_Set> hubs;
vector<Hub_Bit_Map> hub_bits; // the bit map for hubs
First_Hub_Perfect_Bit hub_perfect_bits; // the second level of the index is by perfect hashing
void build_perfect_hash(); // build perfect hash for each second level index
};
inline unsigned int elegant_pair_f(NodePair & p){
// the elegant pair function for mapping a node pair to an interger
// assumption: p.first < p.second
if(p.first >= p.second){
return p.first * p.first + p.first + p.second;
}else{
return p.second * p.second + p.first;
}
}
inline unsigned int elegant_pair_f(unsigned int & a, unsigned int & b){
// the elegant pair function for mapping a node pair to an interger
// assumption: p.first < p.second
if( a >= b){
return a * a + a + b;
}else{
return b * b + a;
}
}
struct Distanct_1s{ // for querying samples
vector<size_t> next; // store the distance between 1: next[i]
size_t i = 0; // current cursor
size_t energy = 0;
int reset(){
i = 0;
energy = 0; // actually curssor is in the first 1
}
Distanct_1s(vector<size_t> & positions, size_t length_original_bitmap){
// positions: the position array of 1s
// construct the next array
int n = positions.size();
if(n > 0){ // the position matrix is not empty
next.resize(n);
for(int k = 0; k < n - 1; k++){
next[k] = positions[k+1] - positions[k];
}
next[n-1] = length_original_bitmap - (positions[n-1] - positions[0]);
reset(); // init the cursor
}
}
int get(){
//return 0/1 based on current energy and cursor
if(next.size() > 0){
bool result = 0;
if(energy == 0){ // we are at the position of 1
energy = next[i];
i = (i + 1) % next.size();
result = 1;
}
energy --;
return result;
}else{ // there no one
return 0;
}
}
};
#endif