-
Notifications
You must be signed in to change notification settings - Fork 232
/
riak_kv.schema
1495 lines (1344 loc) · 56.5 KB
/
riak_kv.schema
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
%%-*- mode: erlang -*-
%% @doc How Riak will repair out-of-sync keys. Some features require
%% this to be set to 'active', including search.
%%
%% * active: out-of-sync keys will be repaired in the background
%% * passive: out-of-sync keys are only repaired on read
%% * active-debug: like active, but outputs verbose debugging
%% information
{mapping, "anti_entropy", "riak_kv.anti_entropy", [
{datatype, {enum, [active, passive, 'active-debug']}},
{default, active}
]}.
{translation,
"riak_kv.anti_entropy",
fun(Conf) ->
Setting = cuttlefish:conf_get("anti_entropy", Conf),
case Setting of
active -> {on, []};
'active-debug' -> {on, [debug]};
passive -> {off, []};
_Default -> {on, []}
end
end
}.
{mapping, "tictacaae_active", "riak_kv.tictacaae_active", [
{datatype, {enum, [active, passive]}},
{default, passive}
]}.
%% @doc Use hashtree tokens for anti-entropy throttling
%% To hold-up the vnode when there is a backlog of activity on the AAE store
%% hashtree token bucket may be used to block the vnode every 90 puts until
%% the PUT has been completed. This use aae_ping with tictac_aae, and a full
%% sync block with legacy anti-entropy
{mapping, "aae_tokenbucket", "riak_kv.aae_tokenbucket", [
{datatype, {flag, enabled, disabled}},
{default, enabled},
{commented, enabled}
]}.
%% @doc A path under which aae data files will be stored.
{mapping, "tictacaae_dataroot", "riak_kv.tictacaae_dataroot", [
{default, "$(platform_data_dir)/tictac_aae"},
{datatype, directory}
]}.
%% @doc A path under which the eraser overload queue will be stored.
{mapping, "eraser_dataroot", "riak_kv.eraser_dataroot", [
{default, "$(platform_data_dir)/kv_eraser"},
{datatype, directory}
]}.
%% @doc A path under which the reaper overload queue will be stored.
{mapping, "reaper_dataroot", "riak_kv.reaper_dataroot", [
{default, "$(platform_data_dir)/kv_reaper"},
{datatype, directory}
]}.
%% @doc A path under which the reader overload queue will be stored.
{mapping, "reader_dataroot", "riak_kv.reader_dataroot", [
{default, "$(platform_data_dir)/kv_reader"},
{datatype, directory}
]}.
%% @doc A path under which the repl real-time overload queue will be stored.
%% @doc A path under which the reaper overload queue will be stored.
{mapping, "replrtq_dataroot", "riak_kv.replrtq_dataroot", [
{default, "$(platform_data_dir)/kv_replrtqsrc"},
{datatype, directory}
]}.
%% @doc The maximum size of the overflow queue for the eraser. If the queue
%% goes beyond this point, new additions will be discarded.
%% To update this at run-time, the configuration item can be changed via
%% remote_console, then `riak_kv_eraser:clear_queue()` can be called to clear
%% the existing queue and start a new queue with the new limit.
{mapping, "eraser_overflow_limit", "riak_kv.eraser_overflow_limit", [
{default, 10000000},
{datatype, integer}
]}.
%% @doc The maximum size of the overflow queue for the reaper. If the queue
%% goes beyond this point, new additions will be discarded.
%% To update this at run-time, the configuration item can be changed via
%% remote_console, then `riak_kv_reaper:clear_queue()` can be called to clear
%% the existing queue and start a new queue with the new limit.
{mapping, "reaper_overflow_limit", "riak_kv.reaper_overflow_limit", [
{default, 10000000},
{datatype, integer}
]}.
%% @doc The maximum size of the overflow queue for the real-time replication
%% queue (nextgen repl only). If the queue goes beyond this point, new
%% additions will be discarded.
%% For replrtq the queue is sized per QueueName (and priority), so differently
%% named queues on the host will each independently inherit this limit, for
%% each of the queue priorities.
%% To update this at run-time, the configuration item can be changed via
%% remote_console, then `riak_kv_replrtq_src:clear_rtq(QueueName)` can be
%% called to clear the existing queue and start a new queue with the new limit.
{mapping, "replrtq_overflow_limit", "riak_kv.replrtq_overflow_limit", [
{default, 10000000},
{datatype, integer}
]}.
%% @doc Parallel key store type
%% When running in parallel mode, which will be the default if the backend does
%% not support native tictac aae (i.e. is not leveled), what type of parallel
%% key store should be kept - leveled_ko (leveled and key-ordered), or
%% leveled_so (leveled and segment ordered).
%% When running in native mode, this setting is ignored
{mapping, "tictacaae_parallelstore", "riak_kv.tictacaae_parallelstore", [
{datatype, {enum, [leveled_ko, leveled_so]}},
{default, leveled_ko},
{commented, leveled_ko}
]}.
%% @doc Minimum Rebuild Wait
%% The minimum number of hours to wait between rebuilds. Default value is 2
%% weeks
{mapping, "tictacaae_rebuildwait", "riak_kv.tictacaae_rebuildwait", [
{datatype, integer},
{default, 336}
]}.
%% @doc Maximum Rebuild Delay
%% The number of seconds which represents the length of the period in which the
%% next rebuild will be scheduled. So if all vnodes are scheduled to rebuild
%% at the same time, they will actually rebuild randomly between 0 an this
%% value (in seconds) after the rebuild time. Default value is 4 days
{mapping, "tictacaae_rebuilddelay", "riak_kv.tictacaae_rebuilddelay", [
{datatype, integer},
{default, 345600}
]}.
%% @doc Store heads in parallel key stores
%% If running a parallel key store, the whole "head" object may be stored to
%% allow for fold_heads queries to be run against the parallel store.
%% Alternatively, the cost of the parallel key store can be reduced by storing
%% only a minimal data set necessary for AAE and monitoring
{mapping, "tictacaae_storeheads", "riak_kv.tictacaae_storeheads", [
{datatype, {flag, enabled, disabled}},
{default, disabled},
{commented, disabled}
]}.
%% @doc Frequency to prompt exchange per vnode
%% The number of milliseconds which the vnode must wait between self-pokes to
%% maybe prompt the next exchange. Default is 8 minutes - check all partitions
%% when n=3 once every hour (in each direction). A cycle of exchanges will
%% take (n - 1) * n + 1 exchange ticks for each nval.
%% Note if this is to be reduced further the riak_core vnode_inactivity_timeout
%% should also be reduced or handoffs may be blocked. To be safe the
%% vnode_inactivity_timeout must be < 0.5 * the tictacaae_exchangetick.
{mapping, "tictacaae_exchangetick", "riak_kv.tictacaae_exchangetick", [
{datatype, integer},
{default, 480000},
hidden
]}.
%% @doc Frequency to prompt rebuild check per vnode
%% The number of milliseconds which the vnode must wait between self-pokes to
%% maybe prompt the next rebuild. Default is 60 minutes.
%% When a node is being re-introduced to a cluster following a long delay, then
%% increase this tick prior to the reintroduction. This will reduce
%% the concurrency of some activity e.g. handoffs and rebuilds
{mapping, "tictacaae_rebuildtick", "riak_kv.tictacaae_rebuildtick", [
{datatype, integer},
{default, 3600000},
hidden
]}.
%% @doc Max number of leaf IDs per exchange
%% To control the length of time for each exchange, only a subset of the
%% conflicting leaves will be compared on each exchange. If there are issues
%% with query timeouts this may be halved. Large backlogs may be reduced
%% faster by doubling. There are 1M segments in a standard tree overall.
%% Performance tuning can also be made by adjusting the `tictacaae_repairloops`
%% and `tictacaae_rangeboost` - but `tictacaae_maxresults` is the simplest
%% factor that is likely to result in a relatively predictable (and linear)
%% outcome in terms of both CPU cost and repair speed.
{mapping, "tictacaae_maxresults", "riak_kv.tictacaae_maxresults", [
{datatype, integer},
{default, 64}
]}.
%% @doc Max number of repair loops per exchange
%% Each exchange will attempt a repair of tictacaae_maxresults, and will
%% analyse those repairs to see if there exists the potential for more repairs.
%% If there exists this potential, then repair loops will be run, but in these
%% repair loops a more efficient repair will be run:
%% - with a bucket, key_range or last_modified range (at least) to restrict the
%% scope of the fetch_clocks queries
%% - without rebuilding the segments in the AAE hash tree
%% - without checking for presence of the object in the journal
%% Problems related to invalid hash trees will need to be detected through the
%% initial loop of the exchange, not repair loops
{mapping, "tictacaae_repairloops", "riak_kv.tictacaae_repairloops", [
{datatype, integer},
{default, 4},
hidden
]}.
%% @doc Multiplier to the `tictcaaae_maxresults` when following an initial AAE
%% exchange with a range-limited exchange.
%% After each exchange, where sufficient deltas are discovered there will be a
%% `tictacaae_repairloops` number of range-limited queries (assuming
%% sufficient results continue to be found). Each of these may have the
%% the number of max results boosted by this integer factor.
%% For example, if `tictacaae_maxresuts` is set to 64, and
%% `tictacaae_repairloops` is set to 4, and the `tictacaae_rangeboost` is set
%% to 2 - the initial loop will use `tictacaae_maxresuts` of 64, but any
%% AAE exchanges on loops 1 to 4 will use 128.
%% Exchanges with range-limited queries are more efficient, and so more tree
%% segments can be fetched without creating significant CPU overheads, hence
%% the use of this boost to maxresults.
{mapping, "tictacaae_rangeboost", "riak_kv.tictacaae_rangeboost", [
{datatype, integer},
{default, 2},
hidden
]}.
%% @doc Exchange only between primary vnodes
%% Setting this to false allows Tictac AAE exchanges between both primary and
%% fallback vnodes.
{mapping, "tictacaae_primaryonly", "riak_kv.tictacaae_primaryonly", [
{datatype, flag},
{default, on},
hidden
]}.
%% @doc Pool Strategy - should a single node_worker_pool or multiple pools be
%% used for queueing potentially longer-running "background" queries
{mapping, "worker_pool_strategy", "riak_kv.worker_pool_strategy", [
{datatype, {enum, [none, single, dscp]}},
{default, dscp},
{commented, dscp}
]}.
%% @doc Pool Sizes - sizes for individual node_worker_pools
%% Only relevant if single or dscp strategy chosen. Set
%% `node_worker_pool_size` if a `single` pool strategy is being used, or set
%% `af_worker_pool_size` and `be_worker_pool_size` if a multiple pool strategy
%% has been chosen.
%% Separate assured forwarding pools will be used of `af_worker_pool_size` for
%% informational aae_folds (find_keys, object_stats) and functional folds
%% (merge_tree_range, fetch_clock_range). The be_pool is used only for tictac
%% AAE rebuilds at present
{mapping, "node_worker_pool_size", "riak_kv.node_worker_pool_size", [
{datatype, integer},
{default, 4}
]}.
{mapping, "af1_worker_pool_size", "riak_kv.af1_worker_pool_size", [
{datatype, integer},
{default, 2}
]}.
{mapping, "af2_worker_pool_size", "riak_kv.af2_worker_pool_size", [
{datatype, integer},
{default, 1}
]}.
{mapping, "af3_worker_pool_size", "riak_kv.af3_worker_pool_size", [
{datatype, integer},
{default, 4}
]}.
{mapping, "af4_worker_pool_size", "riak_kv.af4_worker_pool_size", [
{datatype, integer},
{default, 1}
]}.
{mapping, "be_worker_pool_size", "riak_kv.be_worker_pool_size", [
{datatype, integer},
{default, 1}
]}.
%% @doc Backend PUT Pause (ms).
%% If the backend PUT has resulted in a pause request, then how long should
%% the vnode pause for? This is measured in ms, and currently only applies
%% to the leveled backend
{mapping, "backend_pause_ms", "riak_kv.backend_pause_ms", [
{datatype, integer},
{default, 10},
{commented, 10}
]}.
%% @doc Tombstone Pause (ms)
%% The pause has a dual-purpose. It slows both the riak_kv_reaper and
%% riak_kv_eraser process, as the pause is imposed on each delete or reap.
%% The pause also reduces the probability of a failure to reap when a non-keep
%% delete_mode is used, allowing for more time for the tombstone to propogate.
%% The pause is in milliseconds.
{mapping, "tombstone_pause", "riak_kv.tombstone_pause", [
{datatype, integer},
{default, 2},
{commented, 2}
]}.
%% @doc Whether to allow node to participate in coverage queries.
%% This is used as a manual switch to stop nodes in incomplete states
%% (E.g. doing a full partition repair, or node replace) from participating
%% in coverage queries, as their information may be incomplete (e.g. 2i
%% issues seen in these circumstances).
{mapping, "participate_in_coverage", "riak_core.participate_in_coverage", [
{datatype, {flag, enabled, disabled}},
{default, enabled},
{commented, enabled}
]}.
%% @doc Specifies the storage engine used for Riak's key-value data
%% and secondary indexes (if supported).
{mapping, "storage_backend", "riak_kv.storage_backend", [
{default, {{storage_backend}} },
{datatype, {enum, [bitcask, leveldb, leveled, memory, multi, prefix_multi]}}
]}.
{translation,
"riak_kv.storage_backend",
fun(Conf) ->
Setting = cuttlefish:conf_get("storage_backend", Conf),
case Setting of
bitcask -> riak_kv_bitcask_backend;
leveldb -> riak_kv_eleveldb_backend;
leveled -> riak_kv_leveled_backend;
memory -> riak_kv_memory_backend;
multi -> riak_kv_multi_backend;
prefix_multi -> riak_kv_multi_prefix_backend;
_Default -> riak_kv_bitcask_backend
end
end}.
%% @doc Simplify prefix_multi configuration for Riak CS. Keep this
%% commented out unless Riak is configured for Riak CS.
{mapping, "cs_version", "riak_kv.riak_cs_version", [
{commented, 020000},
{datatype, integer},
{validators, ["verify_cs_backend"]}
]}.
{validator,
"verify_cs_backend",
"must be later than CS 2.0.0",
fun(Value) when is_integer(Value) andalso Value >= 20000-> true;
(_) -> false
end}.
%% @doc Restrict how fast AAE can build hash trees. Building the tree
%% for a given partition requires a full scan over that partition's
%% data. Once built, trees stay built until they are expired.
%% * .number is the number of builds
%% * .per_timespan is the amount of time in which that .number of builds
%% occurs
%%
%% Default is 1 build per hour.
{mapping, "anti_entropy.tree.build_limit.number", "riak_kv.anti_entropy_build_limit", [
{default, 1},
{datatype, integer},
hidden
]}.
%% @see anti_entropy.build_limit.number
{mapping, "anti_entropy.tree.build_limit.per_timespan", "riak_kv.anti_entropy_build_limit", [
{default, "1h"},
{datatype, {duration, ms}},
hidden
]}.
{translation,
"riak_kv.anti_entropy_build_limit",
fun(Conf) ->
{cuttlefish:conf_get("anti_entropy.tree.build_limit.number", Conf),
cuttlefish:conf_get("anti_entropy.tree.build_limit.per_timespan", Conf)}
end}.
%% @doc Determine how often hash trees are expired after being built.
%% Periodically expiring a hash tree ensures the on-disk hash tree
%% data stays consistent with the actual k/v backend data. It also
%% helps Riak identify silent disk failures and bit rot. However,
%% expiration is not needed for normal AAE operation and should be
%% infrequent for performance reasons. The time is specified in
%% milliseconds.
{mapping, "anti_entropy.tree.expiry", "riak_kv.anti_entropy_expire", [
{default, "1w"},
{datatype, [{duration, ms}, {atom, never}]},
hidden
]}.
%% @doc Limit how many AAE exchanges or builds can happen concurrently.
{mapping, "anti_entropy.concurrency_limit", "riak_kv.anti_entropy_concurrency", [
{default, 2},
{datatype, integer},
hidden
]}.
%% @doc The tick determines how often the AAE manager looks for work
%% to do (building/expiring trees, triggering exchanges, etc).
%% The default is every 15 seconds. Lowering this value will
%% speedup the rate that all replicas are synced across the cluster.
%% Increasing the value is not recommended.
{mapping, "anti_entropy.trigger_interval", "riak_kv.anti_entropy_tick", [
{default, "15s"},
{datatype, {duration, ms}},
hidden
]}.
%% @doc The directory where AAE hash trees are stored.
{mapping, "anti_entropy.data_dir", "riak_kv.anti_entropy_data_dir", [
{default, "$(platform_data_dir)/anti_entropy"},
hidden,
{datatype, directory}
]}.
%% @doc The LevelDB options used by AAE to generate the LevelDB-backed
%% on-disk hashtrees.
%% @see leveldb.write_buffer_size
{mapping, "anti_entropy.write_buffer_size", "riak_kv.anti_entropy_leveldb_opts.write_buffer_size", [
{default, "4MB"},
{datatype, bytesize},
hidden
]}.
{mapping, "anti_entropy.max_open_files", "riak_kv.anti_entropy_leveldb_opts.max_open_files", [
{default, 20},
{datatype, integer},
hidden
]}.
%% @doc Whether the distributed throttle for active anti-entropy is
%% enabled.
{mapping, "anti_entropy.throttle", "riak_kv.aae_throttle_enabled", [
{default, on},
{datatype, flag},
hidden
]}.
%% @doc Sets the throttling tiers for active anti-entropy. Each tier
%% is a minimum vnode mailbox size and a time-delay that the throttle
%% should observe at that size and above. For example:
%%
%% anti_entropy.throttle.tier1.mailbox_size = 0
%% anti_entropy.throttle.tier1.delay = 0ms
%% anti_entropy.throttle.tier2.mailbox_size = 40
%% anti_entropy.throttle.tier2.delay = 5ms
%%
%% If configured, there must be a tier which includes a mailbox size
%% of 0. Both .mailbox_size and .delay must be set for each tier.
%% @see anti_entropy.throttle
{mapping,
"anti_entropy.throttle.$tier.mailbox_size",
"riak_kv.aae_throttle_limits", [
{datatype, integer},
hidden,
{validators, ["non_negative"]}
]}.
%% @see anti_entropy.throttle.$tier.mailbox_size
{mapping,
"anti_entropy.throttle.$tier.delay",
"riak_kv.aae_throttle_limits", [
{datatype, {duration, ms}},
hidden
]}.
{validator,
"non_negative",
"must be greater than or equal to 0",
fun(Value) -> Value >= 0 end}.
{translation,
"riak_kv.aae_throttle_limits",
riak_core_throttle:create_limits_translator_fun("anti_entropy", "mailbox_size")
}.
%% @see leveldb.bloomfilter
{mapping, "anti_entropy.bloomfilter", "riak_kv.anti_entropy_leveldb_opts.use_bloomfilter", [
{default, on},
{datatype, flag},
hidden
]}.
%% We left riak_kv.add_paths out on purpose.
%% @doc The maximum number of concurrent requests of each type (get or
%% put) that is allowed. Setting this value to infinite disables
%% overload protection. The 'erlang.process_limit' should be at least
%% 3 times more than this setting.
%% @see erlang.process_limit
{mapping, "max_concurrent_requests", "riak_kv.fsm_limit", [
{default, 50000},
{datatype, [integer, {atom, infinite}]},
hidden
]}.
{translation, "riak_kv.fsm_limit",
fun(Conf) ->
TheLimit = cuttlefish:conf_get("max_concurrent_requests", Conf),
case TheLimit of
infinite -> undefined;
Int when is_integer(Int) -> Int;
_ ->
cuttlefish:invalid("max_concurrent_requests must be an integer or 'infinite'")
end
end
}.
%% @doc If forwarding to a replica-local coordinator on PUT fails,
%% this setting will retry the operation when set to 'on'.
%% * on = Riak 2.0 behavior (strongly recommended)
%% * off = Riak 1.x behavior
{mapping, "retry_put_coordinator_failure", "riak_kv.retry_put_coordinator_failure", [
{default, on},
{datatype, flag},
hidden
]}.
%% @doc Enable or disable mbox_check on PUTs
%% By default mbox_checks are performed so that vnodes with long queues
%% will not be used as put coordinators. To return to the pre-2.9 behaviour
%% of not checking, this can be disabled. The mbox_check may add additional
%% latency to PUTs in some environments.
{mapping, "mbox_check_enabled", "riak_kv.mbox_check_enabled", [
{default, on},
{datatype, flag},
hidden
]}.
%% @doc Controls which binary representation of a riak value is stored
%% on disk.
%% * 0: Original erlang:term_to_binary format. Higher space overhead.
%% * 1: New format for more compact storage of small values.
%% If using the leveled backend object_format 1 will always be used, when
%% persisting data into the backend - even if 0 has been configured here
{mapping, "object.format", "riak_kv.object_format", [
{default, 1},
{datatype, [{integer, 1}, {integer, 0}]}
]}.
{translation, "riak_kv.object_format",
fun(Conf) ->
case cuttlefish:conf_get("object.format", Conf) of
0 -> v0;
1 -> v1;
_ -> cuttlefish:invalid("invalid object format version")
end
end
}.
%% @doc Controls the size of the metadata cache for each vnode. Set to
%% 'off' to disable the cache. This shouldn't be necessary on-disk
%% based backends, but can help performance in some cases (i.e. memory
%% backend, data fits in block cache, etc). Note that this is the size
%% of the ETS table, rather than the actual data, to keep the size
%% calculation simple, thus more space may be used than the simple
%% size * vnode_count calculation would imply.
%%
%% Caution: Do not use without extensive benchmarking.
{mapping, "metadata_cache_size", "riak_kv.vnode_md_cache_size", [
{datatype, [{atom, off}, bytesize]},
{default, off}, %% disabled by default, 256KB is a reasonable value
hidden
]}.
{ translation,
"riak_kv.vnode_md_cache_size",
fun(Conf) ->
case cuttlefish:conf_get("metadata_cache_size", Conf) of
off -> 0;
Size -> Size
end
end
}.
%%%% Memory backend section
%% @doc The maximum amount of memory consumed per vnode by the memory
%% storage backend. Minimum: 1MB
{mapping, "memory_backend.max_memory_per_vnode", "riak_kv.memory_backend.max_memory", [
{datatype, bytesize},
hidden
]}.
%% @see memory_backend.max_memory
{mapping, "multi_backend.$name.memory_backend.max_memory_per_vnode", "riak_kv.multi_backend", [
{datatype, bytesize},
hidden
]}.
{translation,
"riak_kv.memory_backend.max_memory",
fun(Conf) ->
Bytes = cuttlefish:conf_get("memory_backend.max_memory_per_vnode", Conf),
cuttlefish_util:ceiling(Bytes / 1048576)
end
}.
%% @doc Each value written will be written with this "time to
%% live". Once that object's time is up, it will be deleted on the
%% next read of its key. Minimum: 1s
{mapping, "memory_backend.ttl", "riak_kv.memory_backend.ttl", [
{datatype, {duration, s}},
hidden
]}.
%% @see memory_backend.ttl
{mapping, "multi_backend.$name.memory_backend.ttl", "riak_kv.multi_backend", [
{datatype, {duration, s}},
hidden
]}.
%% @doc Measures were added to Riak 1.2 to counteract cross-site
%% scripting and request-forgery attacks. Some reverse-proxies cannot
%% remove the Referer header and make serving data directly from Riak
%% impossible. Turning secure_referer_check = off disables this
%% security check.
{mapping, "secure_referer_check", "riak_kv.secure_referer_check", [
{datatype, flag},
{default, on},
hidden
]}.
%% @doc a dummy value added to riak 2.2.5 to handle upgrades from
%% older riak_ee installs that have `jmx` in their riak.conf
{mapping, "jmx", "riak_kv.jmx_dummy", [
{datatype, flag},
{default, off},
hidden
]}.
%% @doc Reading or writing objects bigger than this size will write a
%% warning in the logs.
{mapping, "object.size.warning_threshold", "riak_kv.warn_object_size", [
{datatype, bytesize},
{default, "5MB"}
]}.
%% @doc Writing an object bigger than this will send a failure to the
%% client.
{mapping, "object.size.maximum", "riak_kv.max_object_size", [
{datatype, bytesize},
{default, "50MB"}
]}.
%% @doc Writing an object with more than this number of siblings will
%% generate a warning in the logs.
{mapping, "object.siblings.warning_threshold", "riak_kv.warn_siblings", [
{datatype, integer},
{default, 25}
]}.
%% @doc Writing an object with more than this number of siblings will
%% send a failure to the client.
{mapping, "object.siblings.maximum", "riak_kv.max_siblings", [
{datatype, integer},
{default, 100}
]}.
%% @doc The strategy used when merging objects that potentially have
%% conflicts.
%%
%% * 2: Riak 2.0 typed bucket default - reduces sibling creation through additional
%% metadata on each sibling (also known as dotted version vectors)
%% * 1: Riak 1.4, default buckets, and earlier default - may duplicate siblings
%% from interleaved writes (sibling explosion.)
{mapping, "buckets.default.merge_strategy", "riak_core.default_bucket_props.dvv_enabled", [
{default, '1'},
{datatype, {flag, '2', '1'}},
hidden
]}.
%% @doc The number of primary replicas (non-fallback) that must reply
%% to a read request.
{mapping, "buckets.default.pr", "riak_core.default_bucket_props.pr", [
{datatype, [integer, {enum, [quorum, all]}]},
{default, 0},
hidden
]}.
%% @doc The number of replicas which must reply to a read request.
{mapping, "buckets.default.r", "riak_core.default_bucket_props.r", [
{datatype, [{enum, [quorum, all]}, integer]},
{default, quorum},
hidden
]}.
%% @doc The number of replicas which must reply to a write request,
%% indicating that the write was received.
{mapping, "buckets.default.w", "riak_core.default_bucket_props.w", [
{datatype, [{enum, [quorum, all]}, integer]},
{default, quorum},
hidden
]}.
%% @doc The number of primary replicas (non-fallback) which must reply
%% to a write request.
{mapping, "buckets.default.pw", "riak_core.default_bucket_props.pw", [
{datatype, [integer, {enum, [quorum, all]}]},
{default, 0},
hidden
]}.
%% @doc The number of replicas which must reply to a write request,
%% indicating that the write was committed to durable storage.
{mapping, "buckets.default.dw", "riak_core.default_bucket_props.dw", [
{datatype, [{enum, [quorum, all]}, integer]},
{default, quorum},
hidden
]}.
%% @doc The number of replicas which must reply to a delete request.
{mapping, "buckets.default.rw", "riak_core.default_bucket_props.rw", [
{datatype, [{enum, [quorum, all]}, integer]},
{default, quorum},
hidden
]}.
%% @doc Whether not-founds will count toward a quorum of reads.
{mapping,
"buckets.default.notfound_ok",
"riak_core.default_bucket_props.notfound_ok", [
{default, true},
{datatype, {enum, [true, false]}},
hidden
]}.
%% @doc Whether not-founds will invoke the "basic quorum"
%% optimization. This setting will short-circuit fetches where the
%% majority of replicas report that the key is not found. Only used
%% when notfound_ok = false.
{mapping,
"buckets.default.basic_quorum",
"riak_core.default_bucket_props.basic_quorum", [
{default, false},
{datatype, {enum, [true, false]}},
hidden
]}.
%% @doc Whether or not siblings are allowed, by default, for untyped buckets.
%% Note: See Vector Clocks for a discussion of sibling resolution.
{mapping, "buckets.default.allow_mult", "riak_core.default_bucket_props.allow_mult", [
{datatype, {enum, [true, false]}},
{default, false},
hidden
]}.
%% @doc Whether conflicting writes resolve via timestamp.
{mapping,
"buckets.default.last_write_wins",
"riak_core.default_bucket_props.last_write_wins", [
{datatype, {enum, [true, false]}},
{default, false},
hidden
]}.
%% @doc A space delimited list of functions that will be run before a
%% value is stored, and that can abort the write. For Erlang
%% functions, use "module:function" and for JavaScript, use
%% "functionName".
{mapping, "buckets.default.precommit", "riak_core.default_bucket_props.precommit", [
hidden
]}.
{translation, "riak_core.default_bucket_props.precommit",
fun(Conf) ->
RawString = cuttlefish:conf_get("buckets.default.precommit", Conf, []),
StringList = string:tokens(RawString, " "),
[ begin
case string:tokens(String, ":") of
%% Javascript make this: {struct, [{<<"name">>, <<"SomeJS.nonsense">>}]}
[JavascriptFunction] ->
{struct, [{<<"name">>, list_to_binary(JavascriptFunction)}]};
%% Erlang make this: {struct, [{<<"mod">>, <<"module">>}, {<<"fun">>,<<"function">>}]}
[Module, Function] ->
{struct, [
{<<"mod">>, list_to_binary(Module)},
{<<"fun">>, list_to_binary(Function)}
]};
_ -> cuttlefish:invalid("incorrect hook format '" ++ String ++ "'")
end
end || String <- StringList]
end
}.
%% @doc A space delimited list of functions that will be run after a
%% value is stored. Only Erlang functions are allowed, using the
%% "module:function" format.
{mapping, "buckets.default.postcommit", "riak_core.default_bucket_props.postcommit", [
hidden
]}.
{translation, "riak_core.default_bucket_props.postcommit",
fun(Conf) ->
RawString = cuttlefish:conf_get("buckets.default.postcommit", Conf, []),
StringList = string:tokens(RawString, " "),
[ begin
case string:tokens(String, ":") of
[Module, Function] ->
{struct, [
{<<"mod">>, list_to_binary(Module)},
{<<"fun">>, list_to_binary(Function)}
]};
_ -> cuttlefish:invalid("incorrect hook format '" ++ String ++ "'")
end
end || String <- StringList]
end
}.
%% @doc Whether serialized datatypes will use compression, and at what
%% level. When an integer, this refers to the aggressiveness (and
%% slowness) of compression, on a scale from 0 to 9. 'on' is
%% equivalent to 6, 'off' is equivalent to 0.
{mapping, "datatypes.compression_level", "riak_dt.binary_compression", [
{datatype, [integer, flag]},
{default, 1},
{validators, ["is_compression_value"]},
hidden
]}.
{validator, "is_compression_value", "must be on/off or a value between 0 and 9",
fun(Value)->
is_boolean(Value) orelse (is_integer(Value) andalso Value =< 9 andalso Value >= 0)
end}.
%% @doc Whether to use the background manager to limit KV handoff.
%% This will help to prevent system response degradation under times
%% of heavy load from multiple background tasks that contend for the
%% same resources.
%% @see background_manager
{mapping, "handoff.use_background_manager", "riak_kv.handoff_use_background_manager", [
{datatype, flag},
{default, off},
hidden
]}.
%% @doc The maximum number of times that a secondary system like Riak
%% Search 2.0 can block handoff of primary key-value data. The
%% approximate maximum duration handoff of a vnode can be blocked for
%% can be determined by multiplying this number by the value of
%% "vnode_management_timer". To prevent handoff from ever being
%% blocked by a secondary system set this value to 0.
%% @see vnode_management_timer
{mapping, "handoff.max_rejects", "riak_kv.handoff_rejected_max", [
{datatype, integer},
{default, "6"},
hidden
]}.
%% @doc Whether to use the background manager to limit AAE tree
%% rebuilds. This will help to prevent system response degradation
%% under times of heavy load from multiple background tasks that
%% contend for the same resources.
%% @see background_manager
{mapping, "anti_entropy.use_background_manager", "riak_kv.aae_use_background_manager", [
{datatype, flag},
{default, off},
hidden
]}.
%% @doc Time in between the checks that trigger Bitcask merges.
{mapping, "bitcask.merge_check_interval", "riak_kv.bitcask_merge_check_interval", [
{default, "3m"},
{datatype, {duration, ms}},
hidden
]}.
%% @doc Jitter used to randomize the time in between the checks that trigger
%% Bitcask merges.
{mapping, "bitcask.merge_check_jitter", "riak_kv.bitcask_merge_check_jitter", [
{default, "30%"},
{datatype, {percent, float}},
hidden
]}.
%% @doc Maximum amount of data to merge in one go in the Bitcask backend.
{mapping, "bitcask.max_merge_size", "riak_kv.bitcask_max_merge_size", [
{default, "100GB"},
{datatype, bytesize},
hidden
]}.
%% @doc Whether to allow list buckets.
{mapping, "cluster.job.riak_kv.list_buckets", "riak_core.job_accept_class", [
merge,
{datatype, {flag, enabled, disabled}},
{default, enabled},
{commented, enabled}
]}.
%% @doc Whether to allow streaming list buckets.
{mapping, "cluster.job.riak_kv.stream_list_buckets", "riak_core.job_accept_class", [
merge,
{datatype, {flag, enabled, disabled}},
{default, enabled},
{commented, enabled}
]}.
%% @doc Whether to allow list keys.
{mapping, "cluster.job.riak_kv.list_keys", "riak_core.job_accept_class", [
merge,
{datatype, {flag, enabled, disabled}},
{default, enabled},
{commented, enabled}
]}.
%% @doc Whether to allow streaming list keys.
{mapping, "cluster.job.riak_kv.stream_list_keys", "riak_core.job_accept_class", [
merge,
{datatype, {flag, enabled, disabled}},
{default, enabled},
{commented, enabled}
]}.
%% @doc Whether to allow secondary index queries.
{mapping, "cluster.job.riak_kv.secondary_index", "riak_core.job_accept_class", [
merge,
{datatype, {flag, enabled, disabled}},
{default, enabled},
{commented, enabled}
]}.
%% @doc Whether to allow streaming secondary index queries.
{mapping, "cluster.job.riak_kv.stream_secondary_index", "riak_core.job_accept_class", [
merge,
{datatype, {flag, enabled, disabled}},
{default, enabled},
{commented, enabled}
]}.
%% @doc Whether to allow term-based map-reduce.
{mapping, "cluster.job.riak_kv.map_reduce", "riak_core.job_accept_class", [
merge,
{datatype, {flag, enabled, disabled}},
{default, enabled},
{commented, enabled}
]}.
%% @doc Whether to allow JavaScript map-reduce.
{mapping, "cluster.job.riak_kv.map_reduce_js", "riak_core.job_accept_class", [
merge,
{datatype, {flag, enabled, disabled}},
{default, enabled},
{commented, enabled}
]}.
%% @doc For Tictac full-sync does all data need to be sync'd, or should a
%% specific bucket be sync'd (bucket), or a specific bucket type (type).
%% Note that in most cases sync of all data is lower overhead than sync of
%% a subset of data - as cached AAE trees will be used.
%% TODO: type is not yet implemented.
{mapping, "ttaaefs_scope", "riak_kv.ttaaefs_scope", [
{datatype, {enum, [all, bucket, type, disabled]}},
{default, disabled}
]}.
%% @doc For tictac full-sync what registered queue name on this cluster should
%% be use for passing references to data which needs to be replicated for AAE
%% full-sync. This queue name must be defined as a
%% `riak_kv.replq<n>_queuename`, but need not be exlusive to full-sync (i.e. a
%% real-time replication queue may be used as well)
{mapping, "ttaaefs_queuename", "riak_kv.ttaaefs_queuename", [
{datatype, atom},
{default, q1_ttaaefs}
]}.
%% @doc Tictac full-sync repair is by default uni-directional - each full-sync
%% operation managed from a given cluster will only attempt to prompt repair
%% where that cluster is in advance (i.e the src of a more advanced value).
%% From Riak 3.0.10 it can be bi-directional, and by configuring the queuename
%% of the remote peer that this cluster is consuming from, repairs will be
%% prompted in both directions.
%% This change reduces the waste associated with discovery work that does not
%% lead to repair activity. To revert back to pre-3.0.10 uni-directional
%% behaviour use the keyword disabled as the peer queuename.
{mapping, "ttaaefs_queuename_peer", "riak_kv.ttaaefs_queuename_peer", [
{datatype, atom},
{default, disabled},
{commented, q1_ttaaefs}
]}.
%% @doc Tictac cluster slice number. Each cluster can be configued with a
%% slice number to space out full-sync events. The numbers can be 1..4. For
%% example when doing bi-directional replication give one cluster a slice
%% number of 1, and another a slice number of 3 (or alternatively use 2 and 4)
{mapping, "ttaaefs_cluster_slice", "riak_kv.ttaaefs_cluster_slice", [
{datatype, integer},
{default, 1}
]}.
%% @doc For tictac full-sync what is the maximum number of AAE segments to be
%% compared per exchange. Reducing this will speed up clock compare queries,
%% but will increase the number of exchanges required to complete a repair.
%% If using range_check to speed-up repairs, this can be reduced as the
%% range_check maxresults will be boosted by the ttaaefs_rangeboost When using
%% range_check a value of 64 is recommended, which may be reduced to 32 or 16
%% if the cluster has a very large volume of keys and/or limited capacity.
%% Only reduce below 16 in exceptional circumstances.
%% More capacity to process sync queries can be added by increaseing the af2
%% and af3 queue sizes - but this will be at the risk of there being a bigger
%% impact on KV performance when repairs are required.