-
Notifications
You must be signed in to change notification settings - Fork 1
/
learnedindex.h
217 lines (173 loc) · 8.8 KB
/
learnedindex.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
#ifndef LEARNED_INDEX_HEADER
#define LEARNED_INDEX_HEADER
#include <atomic>
#include "include/rs/builder.h"
#include "include/rs/builderwithoutminmax.h"
#include "include/rs/radix_spline.h"
template <class KeyType, class ValueType>
class LearnedIndex{
public:
// constructor1 - you fill the builder from a vector of pairs
LearnedIndex(std::vector<std::pair<KeyType, ValueType>> * k, size_t num_radix_bits = 18, size_t max_error = 32);
// constructor2 - you fill the builder from an array of pairs
LearnedIndex(std::pair<KeyType, ValueType> * kv_array, size_t num, size_t num_radix_bits = 18, size_t max_error = 32);
// constructor3 - builder is already filled for you
LearnedIndex(std::vector<std::pair<KeyType, ValueType>> * k, rsplus::BuilderWithoutMinMax<KeyType> & rsb);
// destructor
~LearnedIndex();
inline std::size_t length();
inline bool lookup(const KeyType &lookup_key, int &offset); // get offset of ">=" key
inline bool find(const KeyType &lookup_key, int &offset, bool &deleted_flag); // get offset of "==" key
inline bool find(const KeyType &lookup_key, int &offset, ValueType &val, bool &deleted_flag); // get offset of "==" key and associated value
inline bool update(const int &position, const ValueType &val);
inline bool remove(const int &position);
KeyType min_key, max_key;
inline bool empty() const;
inline typename std::vector<std::pair<KeyType, ValueType>>::iterator begin() const;
inline typename std::vector<std::pair<KeyType, ValueType>>::iterator end() const;
inline bool get_is_removed(typename std::vector<std::pair<KeyType, ValueType>>::iterator & iter) const;
inline long long memory_consumption();
private:
std::vector<std::pair<KeyType, ValueType>> * kv_data; // The key-value store over which the active_learned_index approximates.
bool * is_removed; // We use vector<bool> instead of a triplet because of the space-efficient implementation
rsplus::RadixSpline<KeyType> rspline;
};
template <class KeyType, class ValueType>
LearnedIndex<KeyType, ValueType>::LearnedIndex(std::vector<std::pair<KeyType, ValueType>> * k, size_t num_radix_bits, size_t max_error){
// Keys should be pointing to the initial data
kv_data = k;
// Initialize is_removed so that no vector is removed initially
is_removed = new bool[kv_data->size()];
if(!(*k).empty()){
// Extract minimum and maximum value of the data you want to approximate with the spline
min_key = kv_data->front().first;
max_key = kv_data->back().first;
}
else{
min_key = std::numeric_limits<KeyType>::min();
max_key = std::numeric_limits<KeyType>::min();
}
// Construct the spline in a single pass by iterating over the keys
rsplus::Builder<KeyType> rsb(min_key, max_key, num_radix_bits, max_error);
for (const auto& kv_pair : *k) rsb.AddKey(kv_pair.first);
rspline = rsb.Finalize();
}
template <class KeyType, class ValueType>
LearnedIndex<KeyType, ValueType>::LearnedIndex(std::pair<KeyType, ValueType> * kv_array, size_t num, size_t num_radix_bits, size_t max_error){
// Create empty data vector and reserve space
kv_data = new std::vector<std::pair<KeyType, ValueType>>;
kv_data->reserve(num);
// Initialize is_removed so that no vector is removed initially
is_removed = new bool[num];
if(num > 0){
// Extract minimum and maximum value of the data you want to approximate with the spline
min_key = kv_array[0].first;
max_key = kv_array[num-1].first;
}
else{
min_key = std::numeric_limits<KeyType>::min();
max_key = std::numeric_limits<KeyType>::min();
}
// Construct the spline in a single pass by iterating over the keys
rsplus::Builder<KeyType> rsb(min_key, max_key, num_radix_bits, max_error);
for (int i = 0; i < num; i++) {
kv_data->push_back(kv_array[i]);
rsb.AddKey(kv_array[i].first);
}
rspline = rsb.Finalize();
}
template <class KeyType, class ValueType>
LearnedIndex<KeyType, ValueType>::LearnedIndex(std::vector<std::pair<KeyType, ValueType>> * k, rsplus::BuilderWithoutMinMax<KeyType> & rsb){
// Keys should be pointing to the initial data
kv_data = k;
// Initialize is_removed so that no vector is removed initially
is_removed = new bool[kv_data->size()];
// Construct spline by finalizing the builder that was passed as a parameter
// Nothing changes if the builder is empty
rspline = rsb.Finalize();
}
template <class KeyType, class ValueType>
LearnedIndex<KeyType, ValueType>::~LearnedIndex(){
delete kv_data;
delete is_removed;
}
template <class KeyType, class ValueType>
inline std::size_t LearnedIndex<KeyType, ValueType>::length(){
return kv_data->size();
}
template <class KeyType, class ValueType>
inline bool LearnedIndex<KeyType, ValueType>::lookup(const KeyType &lookup_key, int &offset){
// Finds the next smallest number in keys just greater than or equal to that number and stores it in offset
// Returns false if such number does not exist, true otherwise
// Note: offset will be out-of-bounds for the keys vector when the function returns false
// Search bound for local search using RadixSpline
rsplus::SearchBound bound = rspline.GetSearchBound(lookup_key);
// Perform binary search inside the error bounds to find the exact position
auto start = std::begin(*kv_data) + bound.begin, last = std::begin(*kv_data) + bound.end;
auto binary_search_offset = std::lower_bound(start, last, lookup_key,
[](const std::pair<KeyType, ValueType>& lhs, const KeyType& rhs){
return lhs.first < rhs;
});
offset = binary_search_offset - std::begin(*kv_data);
// Return true iff records greater than or equal to the given key exist in the data
return (offset < kv_data->size());
}
template <class KeyType, class ValueType>
inline bool LearnedIndex<KeyType, ValueType>::find(const KeyType &lookup_key, int &offset, bool &deleted_flag){
// Finds the exact key, if it exists. Returns true if the key exists, false otherwise.
// Uses lookup() and stores the smallest key that is greater
// Note: offset could be out-of-bounds for the keys vector when the function returns false
bool keys_greater_or_equal_exist = lookup(lookup_key, offset);
if(keys_greater_or_equal_exist && (*kv_data)[offset].first == lookup_key){
deleted_flag = is_removed[offset];
return true;
}
else return false;
}
template <class KeyType, class ValueType>
inline bool LearnedIndex<KeyType, ValueType>::find(const KeyType &lookup_key, int &offset, ValueType &val, bool &deleted_flag){
// Finds the exact key, if it exists. Returns true if the key exists, false otherwise.
// Uses lookup() and stores the smallest key that is greater
// Note: This implementation also returns the value associated with the given key
// Note: offset could be out-of-bounds for the keys vector when the function returns false
bool keys_greater_or_equal_exist = lookup(lookup_key, offset);
if(keys_greater_or_equal_exist && (*kv_data)[offset].first == lookup_key){
val = (*kv_data)[offset].second;
deleted_flag = is_removed[offset];
return true;
}
else return false;
}
template <class KeyType, class ValueType>
inline bool LearnedIndex<KeyType, ValueType>::update(const int &position, const ValueType &val){
(*kv_data)[position].second = val;
return !is_removed[position]; // if the kv pair is removed, then return false
}
template <class KeyType, class ValueType>
inline bool LearnedIndex<KeyType, ValueType>::remove(const int &position){
bool res = !is_removed[position]; // we want to return false if we are asked to remove an already removed record
is_removed[position] = true;
return res;
}
template <class KeyType, class ValueType>
inline bool LearnedIndex<KeyType, ValueType>::empty() const{
return kv_data->empty();
}
template <class KeyType, class ValueType>
inline typename std::vector<std::pair<KeyType, ValueType>>::iterator LearnedIndex<KeyType, ValueType>::begin() const{
return kv_data->begin();
}
template <class KeyType, class ValueType>
inline typename std::vector<std::pair<KeyType, ValueType>>::iterator LearnedIndex<KeyType, ValueType>::end() const{
return kv_data->end();
}
template <class KeyType, class ValueType>
inline bool LearnedIndex<KeyType, ValueType>::get_is_removed(typename std::vector<std::pair<KeyType, ValueType>>::iterator & iter) const{
size_t offset = iter - kv_data->begin(); // the iter is from kv_data vector -> decrease the begin() iter of this vector to get a valid offset
return is_removed[offset];
}
template <class KeyType, class ValueType>
inline long long LearnedIndex<KeyType, ValueType>::memory_consumption() {
return rspline.GetSize();
}
#endif