forked from eBay/NuRaft
-
Notifications
You must be signed in to change notification settings - Fork 0
/
raft_params.hxx
479 lines (422 loc) · 13.2 KB
/
raft_params.hxx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
/************************************************************************
Modifications Copyright 2017-2019 eBay Inc.
Author/Developer(s): Jung-Sang Ahn
Original Copyright:
See URL: https://github.com/datatechnology/cornerstone
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
**************************************************************************/
#ifndef _RAFT_PARAMS_HXX_
#define _RAFT_PARAMS_HXX_
#include "basic_types.hxx"
#include "pp_util.hxx"
#include <algorithm>
namespace nuraft {
struct raft_params {
enum return_method_type {
/**
* `append_entries()` will be a blocking call,
* and will return after it is committed in leader node.
*/
blocking = 0x0,
/**
* `append_entries()` will return immediately,
* and callback function (i.e., handler) will be
* invoked after it is committed in leader node.
*/
async_handler = 0x1,
};
enum locking_method_type {
/**
* `append_entries()` will share the same mutex with
* background worker threads.
*/
single_mutex = 0x0,
/**
* `append_entries()` and background worker threads will
* use separate mutexes.
*/
dual_mutex = 0x1,
/**
* (Not supported yet)
* `append_entries()` will use RW-lock, which is separate to
* the mutex used by background worker threads.
*/
dual_rw_lock = 0x2,
};
raft_params()
: election_timeout_upper_bound_(500)
, election_timeout_lower_bound_(250)
, heart_beat_interval_(125)
, rpc_failure_backoff_(50)
, log_sync_batch_size_(1000)
, log_sync_stop_gap_(99999)
, snapshot_distance_(0)
, snapshot_block_size_(0)
, max_append_size_(100)
, reserved_log_items_(100000)
, client_req_timeout_(3000)
, fresh_log_gap_(200)
, stale_log_gap_(2000)
, custom_commit_quorum_size_(0)
, custom_election_quorum_size_(0)
, leadership_expiry_(0)
, allow_temporary_zero_priority_leader_(true)
, auto_forwarding_(false)
, use_bg_thread_for_urgent_commit_(true)
, exclude_snp_receiver_from_quorum_(false)
, auto_adjust_quorum_for_small_cluster_(false)
, locking_method_type_(dual_mutex)
, return_method_(blocking)
{}
/**
* Election timeout upper bound in milliseconds
*
* @param timeout
* @return self
*/
raft_params& with_election_timeout_upper(int32 timeout) {
election_timeout_upper_bound_ = timeout;
return *this;
}
/**
* Election timeout lower bound in milliseconds
*
* @param timeout
* @return self
*/
raft_params& with_election_timeout_lower(int32 timeout) {
election_timeout_lower_bound_ = timeout;
return *this;
}
/**
* heartbeat interval in milliseconds
*
* @param hb_interval
* @return self
*/
raft_params& with_hb_interval(int32 hb_interval) {
heart_beat_interval_ = hb_interval;
return *this;
}
/**
* Rpc failure backoff in milliseconds
*
* @param backoff
* @return self
*/
raft_params& with_rpc_failure_backoff(int32 backoff) {
rpc_failure_backoff_ = backoff;
return *this;
}
/**
* The maximum log entries could be attached to an appendEntries call
*
* @param size
* @return self
*/
raft_params& with_max_append_size(int32 size) {
max_append_size_ = size;
return *this;
}
/**
* For new member that just joined the cluster, we will use
* log sync to ask it to catch up, and this parameter is to
* specify how many log entries to pack for each sync request.
*
* @param batch_size
* @return self
*/
raft_params& with_log_sync_batch_size(int32 batch_size) {
log_sync_batch_size_ = batch_size;
return *this;
}
/**
* For new member that just joined the cluster, we will use
* log sync to ask it to catch up, and this parameter is to
* tell when to stop using log sync but appendEntries for the
* new server.
* When `leaderCommitIndex - indexCaughtUp < logSyncStopGap`,
* then appendEntries will be used.
*
* @param gap
* @return self
*/
raft_params& with_log_sync_stopping_gap(int32 gap) {
log_sync_stop_gap_ = gap;
return *this;
}
/**
* Enable log compact and snapshot with the commit distance
*
* @param commit_distance
* Log distance to compact between two snapshots.
* @return self
*/
raft_params& with_snapshot_enabled(int32 commit_distance) {
snapshot_distance_ = commit_distance;
return *this;
}
/**
* The TCP block size for syncing the snapshots.
*
* @param size
* @return self
*/
raft_params& with_snapshot_sync_block_size(int32 size) {
snapshot_block_size_ = size;
return *this;
}
/**
* The number of reserved log items when doing log compaction.
*
* @param number_of_logs Number of log items.
* @return self
*/
raft_params& with_reserved_log_items(int number_of_logs) {
reserved_log_items_ = number_of_logs;
return *this;
}
/**
* Timeout of the execution of client request (in ms).
*
* @param timeout
* @return self
*/
raft_params& with_client_req_timeout(int timeout) {
client_req_timeout_ = timeout;
return *this;
}
/**
* Enable auto-forwarding, so that non-leader node re-directs client
* request to the current leader.
*
* @param enable
* @return self
*/
raft_params& with_auto_forwarding(bool enable) {
auto_forwarding_ = enable;
return *this;
}
/**
* If this node is considered as stale and the gap between this node's committed
* log index and the leader's committed log index is smaller than this threshold,
* this node becomes fresh.
*
* @param new_gap New threshold.
* @return self
*/
raft_params& with_fresh_log_gap(int32 new_gap) {
fresh_log_gap_ = new_gap;
return *this;
}
/**
* If this node is considered as fresh and the gap between this node's committed
* log index and the leader's committed log index is larger than this threshold,
* this node becomes stale.
*
* @param new_gap New threshold.
* @return self
*/
raft_params& with_stale_log_gap(int32 new_gap) {
stale_log_gap_ = new_gap;
return *this;
}
/**
* If this is set to positive non-zero value, commiting
* a log will be based on this quorum size. Leader election
* will not be affected.
*
* If set to zero, the default quorum size will be used:
* `ceil{ (N+1) / 2 }`, where N is the number of nodes including
* the leader.
*
* If this is set to wrong value, Raft will use the default
* quorum size.
*
* @param new_size New custom commit quorum size.
* @return self
*/
raft_params& with_custom_commit_quorum_size(int32 new_size) {
custom_commit_quorum_size_ = new_size;
return *this;
}
/**
* If this is set to positive non-zero value, electing a
* new leader will be based on this quorum size. Committing
* a log will not be affected.
*
* If set to zero, the default quorum size will be used:
* `ceil{ (N+1) / 2 }`, where N is the number of nodes including
* the leader.
*
* If this is set to wrong value, Raft will use the default
* quorum size.
*
* @param new_size New custom election quorum size.
* @return self
*/
raft_params& with_custom_election_quorum_size(int32 new_size) {
custom_election_quorum_size_ = new_size;
return *this;
}
/**
* Set the expiration time of leadership.
*
* @param expiry_ms New leadership expiration in millisecond.
* @return self
*/
raft_params& with_leadership_expiry(int32 expiry_ms) {
leadership_expiry_ = expiry_ms;
return *this;
}
/**
* Return heartbeat interval.
* If given heartbeat interval is smaller than a specific value
* based on election timeout, return it instead.
*
* @return Heartbeat interval in millisecond.
*/
int max_hb_interval() const {
return std::max
( heart_beat_interval_,
election_timeout_lower_bound_ - (heart_beat_interval_ / 2) );
}
public:
/**
* Upper bound of election timer, in millisecond.
*/
int32 election_timeout_upper_bound_;
/**
* Lower bound of election timer, in millisecond.
*/
int32 election_timeout_lower_bound_;
/**
* Heartbeat interval, in millisecond.
*/
int32 heart_beat_interval_;
/**
* Backoff time when RPC failure happens, in millisecond.
*/
int32 rpc_failure_backoff_;
/**
* Max number of logs that can be packed in a RPC
* for catch-up of joining an empty node.
*/
int32 log_sync_batch_size_;
/**
* Log gap (the number of logs) to stop catch-up of
* joining a new node. Once this condition meets,
* that newly joined node is added to peer list
* and starts to receive heartbeat from leader.
*
* If zero, the new node will be added to the peer list
* immediately.
*/
int32 log_sync_stop_gap_;
/**
* Log gap (the number of logs) to create a Raft snapshot.
*/
int32 snapshot_distance_;
/**
* (Deprecated).
*/
int32 snapshot_block_size_;
/**
* Max number of logs that can be packed in a RPC
* for append entry request.
*/
int32 max_append_size_;
/**
* Minimum number of logs that will be preserved
* (i.e., protected from log compaction) since the
* last Raft snapshot.
*/
int32 reserved_log_items_;
/**
* Client request timeout in millisecond.
*/
int32 client_req_timeout_;
/**
* Log gap (compared to the leader's latest log)
* for treating this node as fresh.
*/
int32 fresh_log_gap_;
/**
* Log gap (compared to the leader's latest log)
* for treating this node as stale.
*/
int32 stale_log_gap_;
/**
* Custom quorum size for commit.
* If set to zero, the default quorum size will be used.
*/
int32 custom_commit_quorum_size_;
/**
* Custom quorum size for leader election.
* If set to zero, the default quorum size will be used.
*/
int32 custom_election_quorum_size_;
/**
* Expiration time of leadership in millisecond.
* If more than quorum nodes do not respond within
* this time, the current leader will immediately
* yield its leadership and become follower.
* If 0, it is automatically set to `heartbeat * 20`.
* If negative number, leadership will never be expired
* (the same as the original Raft logic).
*/
int32 leadership_expiry_;
/**
* If true, zero-priority member can initiate vote
* when leader is not elected long time (that can happen
* only the zero-priority member has the latest log).
* Once the zero-priority member becomes a leader,
* it will immediately yield leadership so that other
* higher priority node can takeover.
*/
bool allow_temporary_zero_priority_leader_;
/**
* If true, follower node will forward client request
* to the current leader.
* Otherwise, it will return error to client immediately.
*/
bool auto_forwarding_;
/**
* If true, creating replication (append_entries) requests will be
* done by a backgroudn thread, instead of doing it in user threads.
* There can be some delay a little bit, but it improves reducing
* the lock contention.
*/
bool use_bg_thread_for_urgent_commit_;
/**
* If true, a server who is currently receiving snapshot will not be
* counted in quorum. It is useful when there are only two servers
* in the cluster. Once the follower is receiving snapshot, the
* leader cannot make any progress.
*/
bool exclude_snp_receiver_from_quorum_;
/**
* If `true` and the size of the cluster is 2, the quorum size
* will be adjusted to 1 automatically, once one of two nodes
* becomes offline.
*/
bool auto_adjust_quorum_for_small_cluster_;
/**
* Choose the type of lock that will be used by user threads.
*/
locking_method_type locking_method_type_;
/**
* To choose blocking call or asynchronous call.
*/
return_method_type return_method_;
};
}
#endif //_RAFT_PARAMS_HXX_