priv/riak_kv.schema

%%-*- mode: erlang -*-

%% @doc How Riak will repair out-of-sync keys. Some features require
%% this to be set to 'active', including search.
%%
%% * active: out-of-sync keys will be repaired in the background
%% * passive: out-of-sync keys are only repaired on read
%% * active-debug: like active, but outputs verbose debugging
%%   information
{mapping, "anti_entropy", "riak_kv.anti_entropy", [
  {datatype, {enum, [active, passive, 'active-debug']}},
  {default, active}
]}.

{translation,
 "riak_kv.anti_entropy",
 fun(Conf) ->
    Setting = cuttlefish:conf_get("anti_entropy", Conf),
    case Setting of
      active -> {on, []};
      'active-debug' -> {on, [debug]};
      passive -> {off, []};
      _Default -> {on, []}
    end
  end
}.

{mapping, "tictacaae_active", "riak_kv.tictacaae_active", [
  {datatype, {enum, [active, passive]}},
  {default, passive}
]}.

%% @doc Use hashtree tokens for anti-entropy throttling
%% To hold-up the vnode when there is a backlog of activity on the AAE store
%% hashtree token bucket may be used to block the vnode every 90 puts until
%% the PUT has been completed.  This use aae_ping with tictac_aae, and a full
%% sync block with legacy anti-entropy
{mapping, "aae_tokenbucket", "riak_kv.aae_tokenbucket", [
  {datatype, {flag, enabled, disabled}},
  {default, enabled},
  {commented, enabled}
]}.

%% @doc A path under which aae data files will be stored.
{mapping, "tictacaae_dataroot", "riak_kv.tictacaae_dataroot", [
  {default, "$(platform_data_dir)/tictac_aae"},
  {datatype, directory}
]}.

%% @doc A path under which the eraser overload queue will be stored.
{mapping, "eraser_dataroot", "riak_kv.eraser_dataroot", [
  {default, "$(platform_data_dir)/kv_eraser"},
  {datatype, directory}
]}.

%% @doc A path under which the reaper overload queue will be stored.
{mapping, "reaper_dataroot", "riak_kv.reaper_dataroot", [
  {default, "$(platform_data_dir)/kv_reaper"},
  {datatype, directory}
]}.

%% @doc A path under which the reader overload queue will be stored.
{mapping, "reader_dataroot", "riak_kv.reader_dataroot", [
  {default, "$(platform_data_dir)/kv_reader"},
  {datatype, directory}
]}.

%% @doc A path under which the repl real-time overload queue will be stored.
%% @doc A path under which the reaper overload queue will be stored.
{mapping, "replrtq_dataroot", "riak_kv.replrtq_dataroot", [
  {default, "$(platform_data_dir)/kv_replrtqsrc"},
  {datatype, directory}
]}.

%% @doc The maximum size of the overflow queue for the eraser.  If the queue
%% goes beyond this point, new additions will be discarded.
%% To update this at run-time, the configuration item can be changed via
%% remote_console, then `riak_kv_eraser:clear_queue()` can be called to clear
%% the existing queue and start a new queue with the new limit.
{mapping, "eraser_overflow_limit", "riak_kv.eraser_overflow_limit", [
  {default, 10000000},
  {datatype, integer}
]}.

%% @doc The maximum size of the overflow queue for the reaper.  If the queue
%% goes beyond this point, new additions will be discarded.
%% To update this at run-time, the configuration item can be changed via
%% remote_console, then `riak_kv_reaper:clear_queue()` can be called to clear
%% the existing queue and start a new queue with the new limit.
{mapping, "reaper_overflow_limit", "riak_kv.reaper_overflow_limit", [
  {default, 10000000},
  {datatype, integer}
]}.

%% @doc The maximum size of the overflow queue for the real-time replication
%% queue (nextgen repl only).  If the queue goes beyond this point, new
%% additions will be discarded.
%% For replrtq the queue is sized per QueueName (and priority), so differently
%% named queues on the host will each independently inherit this limit, for
%% each of the queue priorities.
%% To update this at run-time, the configuration item can be changed via
%% remote_console, then `riak_kv_replrtq_src:clear_rtq(QueueName)` can be
%% called to clear the existing queue and start a new queue with the new limit.
{mapping, "replrtq_overflow_limit", "riak_kv.replrtq_overflow_limit", [
  {default, 10000000},
  {datatype, integer}
]}.

%% @doc Parallel key store type
%% When running in parallel mode, which will be the default if the backend does
%% not support native tictac aae (i.e. is not leveled), what type of parallel 
%% key store should be kept - leveled_ko (leveled and key-ordered), or 
%% leveled_so (leveled and segment ordered).
%% When running in native mode, this setting is ignored
{mapping, "tictacaae_parallelstore", "riak_kv.tictacaae_parallelstore", [
  {datatype, {enum, [leveled_ko, leveled_so]}},
  {default, leveled_ko},
  {commented, leveled_ko}
]}.

%% @doc Minimum Rebuild Wait
%% The minimum number of hours to wait between rebuilds.  Default value is 2 
%% weeks
{mapping, "tictacaae_rebuildwait", "riak_kv.tictacaae_rebuildwait", [
  {datatype, integer},
  {default, 336}
]}.

%% @doc Maximum Rebuild Delay
%% The number of seconds which represents the length of the period in which the
%% next rebuild will be scheduled.  So if all vnodes are scheduled to rebuild 
%% at the same time, they will actually rebuild randomly between 0 an this 
%% value (in seconds) after the rebuild time. Default value is 4 days
{mapping, "tictacaae_rebuilddelay", "riak_kv.tictacaae_rebuilddelay", [
  {datatype, integer},
  {default, 345600}
]}.

%% @doc Store heads in parallel key stores
%% If running a parallel key store, the whole "head" object may be stored to 
%% allow for fold_heads queries to be run against the parallel store.  
%% Alternatively, the cost of the parallel key store can be reduced by storing 
%% only a minimal data set necessary for AAE and monitoring
{mapping, "tictacaae_storeheads", "riak_kv.tictacaae_storeheads", [
  {datatype, {flag, enabled, disabled}},
  {default, disabled},
  {commented, disabled}
]}.

%% @doc Frequency to prompt exchange per vnode
%% The number of milliseconds which the vnode must wait between self-pokes to
%% maybe prompt the next exchange. Default is 8 minutes - check all partitions
%% when n=3 once every hour (in each direction).  A cycle of exchanges will
%% take (n - 1) * n + 1 exchange ticks for each nval.  
%% Note if this is to be reduced further the riak_core vnode_inactivity_timeout
%% should also be reduced or handoffs may be blocked.  To be safe the 
%% vnode_inactivity_timeout must be < 0.5 * the tictacaae_exchangetick. 
{mapping, "tictacaae_exchangetick", "riak_kv.tictacaae_exchangetick", [
  {datatype, integer},
  {default, 480000},
  hidden
]}.

%% @doc Frequency to prompt rebuild check per vnode
%% The number of milliseconds which the vnode must wait between self-pokes to
%% maybe prompt the next rebuild. Default is 60 minutes.
%% When a node is being re-introduced to a cluster following a long delay, then
%% increase this tick prior to the reintroduction.  This will reduce
%% the concurrency of some activity e.g. handoffs and rebuilds
{mapping, "tictacaae_rebuildtick", "riak_kv.tictacaae_rebuildtick", [
  {datatype, integer},
  {default, 3600000},
  hidden
]}.

%% @doc Max number of leaf IDs per exchange
%% To control the length of time for each exchange, only a subset of the
%% conflicting leaves will be compared on each exchange.  If there are issues
%% with query timeouts this may be halved.  Large backlogs may be reduced
%% faster by doubling.  There are 1M segments in a standard tree overall.
%% Performance tuning can also be made by adjusting the `tictacaae_repairloops`
%% and `tictacaae_rangeboost` - but `tictacaae_maxresults` is the simplest
%% factor that is likely to result in a relatively predictable (and linear) 
%% outcome in terms of both CPU cost and repair speed.
{mapping, "tictacaae_maxresults", "riak_kv.tictacaae_maxresults", [
  {datatype, integer},
  {default, 64}
]}.

%% @doc Max number of repair loops per exchange
%% Each exchange will attempt a repair of tictacaae_maxresults, and will
%% analyse those repairs to see if there exists the potential for more repairs.
%% If there exists this potential, then repair loops will be run, but in these
%% repair loops a more efficient repair will be run:
%% - with a bucket, key_range or last_modified range (at least) to restrict the
%% scope of the fetch_clocks queries
%% - without rebuilding the segments in the AAE hash tree
%% - without checking for presence of the object in the journal
%% Problems related to invalid hash trees will need to be detected through the
%% initial loop of the exchange, not repair loops
{mapping, "tictacaae_repairloops", "riak_kv.tictacaae_repairloops", [
  {datatype, integer},
  {default, 4},
  hidden
]}.

%% @doc Multiplier to the `tictcaaae_maxresults` when following an initial AAE 
%% exchange with a range-limited exchange.
%% After each exchange, where sufficient deltas are discovered there will be a
%% `tictacaae_repairloops` number of range-limited queries (assuming
%% sufficient results continue to be found).  Each of these may have the
%% the number of max results boosted by this integer factor.  
%% For example, if `tictacaae_maxresuts` is set to 64, and
%% `tictacaae_repairloops` is set to 4, and the `tictacaae_rangeboost` is set
%% to 2 - the initial loop will use `tictacaae_maxresuts` of 64, but any
%% AAE exchanges on loops 1 to 4 will use 128.
%% Exchanges with range-limited queries are more efficient, and so more tree
%% segments can be fetched without creating significant CPU overheads, hence 
%% the use of this boost to maxresults.
{mapping, "tictacaae_rangeboost", "riak_kv.tictacaae_rangeboost", [
  {datatype, integer},
  {default, 2},
  hidden
]}.

%% @doc Exchange only between primary vnodes
%% Setting this to false allows Tictac AAE exchanges between both primary and
%% fallback vnodes.
{mapping, "tictacaae_primaryonly", "riak_kv.tictacaae_primaryonly", [
  {datatype, flag},
  {default, on},
  hidden
]}.


%% @doc Pool Strategy - should a single node_worker_pool or multiple pools be
%% used for queueing potentially longer-running "background" queries
{mapping, "worker_pool_strategy", "riak_kv.worker_pool_strategy", [
  {datatype, {enum, [none, single, dscp]}},
  {default, dscp},
  {commented, dscp}
]}.

%% @doc Pool Sizes - sizes for individual node_worker_pools
%% Only relevant if single or dscp strategy chosen.  Set
%% `node_worker_pool_size` if a `single` pool strategy is being used, or set
%% `af_worker_pool_size` and `be_worker_pool_size` if a multiple pool strategy
%% has been chosen.
%% Separate assured forwarding pools will be used of `af_worker_pool_size` for
%% informational aae_folds (find_keys, object_stats) and functional folds
%% (merge_tree_range, fetch_clock_range).  The be_pool is used only for tictac
%% AAE rebuilds at present 
{mapping, "node_worker_pool_size", "riak_kv.node_worker_pool_size", [
  {datatype, integer},
  {default, 4}
]}.
{mapping, "af1_worker_pool_size", "riak_kv.af1_worker_pool_size", [
  {datatype, integer},
  {default, 2}
]}.
{mapping, "af2_worker_pool_size", "riak_kv.af2_worker_pool_size", [
  {datatype, integer},
  {default, 1}
]}.
{mapping, "af3_worker_pool_size", "riak_kv.af3_worker_pool_size", [
  {datatype, integer},
  {default, 4}
]}.
{mapping, "af4_worker_pool_size", "riak_kv.af4_worker_pool_size", [
  {datatype, integer},
  {default, 1}
]}.
{mapping, "be_worker_pool_size", "riak_kv.be_worker_pool_size", [
  {datatype, integer},
  {default, 1}
]}.


%% @doc Backend PUT Pause (ms).
%% If the backend PUT has resulted in a pause request, then how long should
%% the vnode pause for?  This is measured in ms, and currently only applies
%% to the leveled backend
{mapping, "backend_pause_ms", "riak_kv.backend_pause_ms", [
  {datatype, integer},
  {default, 10},
  {commented, 10}
]}.

%% @doc Tombstone Pause (ms)
%% The pause has a dual-purpose.  It slows both the riak_kv_reaper and
%% riak_kv_eraser process, as the pause is imposed on each delete or reap.
%% The pause also reduces the probability of a failure to reap when a non-keep
%% delete_mode is used, allowing for more time for the tombstone to propogate.
%% The pause is in milliseconds.
{mapping, "tombstone_pause", "riak_kv.tombstone_pause", [
  {datatype, integer},
  {default, 2},
  {commented, 2}
]}.

%% @doc Whether to allow node to participate in coverage queries.
%% This is used as a manual switch to stop nodes in incomplete states
%% (E.g. doing a full partition repair, or node replace) from participating
%% in coverage queries, as their information may be incomplete (e.g. 2i
%% issues seen in these circumstances).
{mapping, "participate_in_coverage", "riak_core.participate_in_coverage", [
    {datatype, {flag, enabled, disabled}},
    {default, enabled},
    {commented, enabled}
]}.

%% @doc Specifies the storage engine used for Riak's key-value data
%% and secondary indexes (if supported).
{mapping, "storage_backend", "riak_kv.storage_backend", [
  {default, {{storage_backend}} },
  {datatype, {enum, [bitcask, leveldb, leveled, memory, multi, prefix_multi]}}
]}.

{translation,
 "riak_kv.storage_backend",
 fun(Conf) ->
    Setting = cuttlefish:conf_get("storage_backend", Conf),
    case Setting of
      bitcask -> riak_kv_bitcask_backend;
      leveldb -> riak_kv_eleveldb_backend;
      leveled -> riak_kv_leveled_backend;
      memory -> riak_kv_memory_backend;
      multi -> riak_kv_multi_backend;
      prefix_multi -> riak_kv_multi_prefix_backend;
      _Default -> riak_kv_bitcask_backend
    end
 end}.

%% @doc Simplify prefix_multi configuration for Riak CS. Keep this
%% commented out unless Riak is configured for Riak CS.
{mapping, "cs_version", "riak_kv.riak_cs_version", [
  {commented, 020000},
  {datatype, integer},
  {validators, ["verify_cs_backend"]}
]}.

{validator,
 "verify_cs_backend",
 "must be later than CS 2.0.0",
 fun(Value) when is_integer(Value) andalso Value >= 20000-> true;
    (_) -> false
 end}.

%% @doc Restrict how fast AAE can build hash trees. Building the tree
%% for a given partition requires a full scan over that partition's
%% data. Once built, trees stay built until they are expired.
%% * .number is the number of builds
%% * .per_timespan is the amount of time in which that .number of builds
%%   occurs
%%
%% Default is 1 build per hour.
{mapping, "anti_entropy.tree.build_limit.number", "riak_kv.anti_entropy_build_limit", [
  {default, 1},
  {datatype, integer},
  hidden
]}.

%% @see anti_entropy.build_limit.number
{mapping, "anti_entropy.tree.build_limit.per_timespan", "riak_kv.anti_entropy_build_limit", [
  {default, "1h"},
  {datatype, {duration, ms}},
  hidden
]}.

{translation,
 "riak_kv.anti_entropy_build_limit",
 fun(Conf) ->
    {cuttlefish:conf_get("anti_entropy.tree.build_limit.number", Conf),
     cuttlefish:conf_get("anti_entropy.tree.build_limit.per_timespan", Conf)}
 end}.

%% @doc Determine how often hash trees are expired after being built.
%% Periodically expiring a hash tree ensures the on-disk hash tree
%% data stays consistent with the actual k/v backend data. It also
%% helps Riak identify silent disk failures and bit rot. However,
%% expiration is not needed for normal AAE operation and should be
%% infrequent for performance reasons. The time is specified in
%% milliseconds.
{mapping, "anti_entropy.tree.expiry", "riak_kv.anti_entropy_expire", [
  {default, "1w"},
  {datatype, [{duration, ms}, {atom, never}]},
  hidden
]}.

%% @doc Limit how many AAE exchanges or builds can happen concurrently.
{mapping, "anti_entropy.concurrency_limit", "riak_kv.anti_entropy_concurrency", [
  {default, 2},
  {datatype, integer},
  hidden
]}.

%% @doc The tick determines how often the AAE manager looks for work
%% to do (building/expiring trees, triggering exchanges, etc).
%% The default is every 15 seconds. Lowering this value will
%% speedup the rate that all replicas are synced across the cluster.
%% Increasing the value is not recommended.
{mapping, "anti_entropy.trigger_interval", "riak_kv.anti_entropy_tick", [
  {default, "15s"},
  {datatype, {duration, ms}},
  hidden
]}.

%% @doc The directory where AAE hash trees are stored.
{mapping, "anti_entropy.data_dir", "riak_kv.anti_entropy_data_dir", [
  {default, "$(platform_data_dir)/anti_entropy"},
  hidden,
  {datatype, directory}
]}.

%% @doc The LevelDB options used by AAE to generate the LevelDB-backed
%% on-disk hashtrees.
%% @see leveldb.write_buffer_size
{mapping, "anti_entropy.write_buffer_size", "riak_kv.anti_entropy_leveldb_opts.write_buffer_size", [
  {default, "4MB"},
  {datatype, bytesize},
  hidden
]}.

{mapping, "anti_entropy.max_open_files", "riak_kv.anti_entropy_leveldb_opts.max_open_files", [
  {default, 20},
  {datatype, integer},
  hidden
]}.

%% @doc Whether the distributed throttle for active anti-entropy is
%% enabled.
{mapping, "anti_entropy.throttle", "riak_kv.aae_throttle_enabled", [
  {default, on},
  {datatype, flag},
  hidden
]}.

%% @doc Sets the throttling tiers for active anti-entropy. Each tier
%% is a minimum vnode mailbox size and a time-delay that the throttle
%% should observe at that size and above. For example:
%%
%%     anti_entropy.throttle.tier1.mailbox_size = 0
%%     anti_entropy.throttle.tier1.delay = 0ms
%%     anti_entropy.throttle.tier2.mailbox_size = 40
%%     anti_entropy.throttle.tier2.delay = 5ms
%%
%% If configured, there must be a tier which includes a mailbox size
%% of 0. Both .mailbox_size and .delay must be set for each tier.
%% @see anti_entropy.throttle
{mapping,
 "anti_entropy.throttle.$tier.mailbox_size",
 "riak_kv.aae_throttle_limits", [
  {datatype, integer},
  hidden,
  {validators, ["non_negative"]}
]}.

%% @see anti_entropy.throttle.$tier.mailbox_size
{mapping,
 "anti_entropy.throttle.$tier.delay",
 "riak_kv.aae_throttle_limits", [
  {datatype, {duration, ms}},
  hidden
]}.

{validator,
 "non_negative",
 "must be greater than or equal to 0",
 fun(Value) -> Value >= 0 end}.

{translation,
 "riak_kv.aae_throttle_limits",
 riak_core_throttle:create_limits_translator_fun("anti_entropy", "mailbox_size")
}.

%% @see leveldb.bloomfilter
{mapping, "anti_entropy.bloomfilter", "riak_kv.anti_entropy_leveldb_opts.use_bloomfilter", [
  {default, on},
  {datatype, flag},
  hidden
]}.

%% We left riak_kv.add_paths out on purpose.

%% @doc The maximum number of concurrent requests of each type (get or
%% put) that is allowed. Setting this value to infinite disables
%% overload protection. The 'erlang.process_limit' should be at least
%% 3 times more than this setting.
%% @see erlang.process_limit
{mapping, "max_concurrent_requests", "riak_kv.fsm_limit", [
  {default, 50000},
  {datatype, [integer, {atom, infinite}]},
  hidden
]}.

{translation, "riak_kv.fsm_limit",
 fun(Conf) ->
  TheLimit = cuttlefish:conf_get("max_concurrent_requests", Conf),
  case TheLimit of
      infinite -> undefined;
      Int when is_integer(Int) -> Int;
      _ ->
          cuttlefish:invalid("max_concurrent_requests must be an integer or 'infinite'")
  end
 end
}.

%% @doc If forwarding to a replica-local coordinator on PUT fails,
%% this setting will retry the operation when set to 'on'.
%%   * on = Riak 2.0 behavior (strongly recommended)
%%   * off = Riak 1.x behavior
{mapping, "retry_put_coordinator_failure", "riak_kv.retry_put_coordinator_failure", [
  {default, on},
  {datatype, flag},
  hidden
]}.

%% @doc Enable or disable mbox_check on PUTs
%% By default mbox_checks are performed so that vnodes with long queues
%% will not be used as put coordinators.  To return to the pre-2.9 behaviour
%% of not checking, this can be disabled.  The mbox_check may add additional
%% latency to PUTs in some environments.
{mapping, "mbox_check_enabled", "riak_kv.mbox_check_enabled", [
    {default, on},
    {datatype, flag},
    hidden
]}.

%% @doc Controls which binary representation of a riak value is stored
%% on disk.
%% * 0: Original erlang:term_to_binary format. Higher space overhead.
%% * 1: New format for more compact storage of small values.
%% If using the leveled backend object_format 1 will always be used, when
%% persisting data into the backend - even if 0 has been configured here
{mapping, "object.format", "riak_kv.object_format", [
  {default, 1},
  {datatype, [{integer, 1}, {integer, 0}]}
]}.

{translation, "riak_kv.object_format",
 fun(Conf) ->
   case cuttlefish:conf_get("object.format", Conf) of
       0 -> v0;
       1 -> v1;
       _ -> cuttlefish:invalid("invalid object format version")
   end
 end
}.

%% @doc Controls the size of the metadata cache for each vnode. Set to
%% 'off' to disable the cache.  This shouldn't be necessary on-disk
%% based backends, but can help performance in some cases (i.e. memory
%% backend, data fits in block cache, etc). Note that this is the size
%% of the ETS table, rather than the actual data, to keep the size
%% calculation simple, thus more space may be used than the simple
%% size * vnode_count calculation would imply.
%%
%% Caution: Do not use without extensive benchmarking.
{mapping, "metadata_cache_size", "riak_kv.vnode_md_cache_size", [
  {datatype, [{atom, off}, bytesize]},
  {default, off}, %% disabled by default, 256KB is a reasonable value
  hidden
]}.

{ translation,
  "riak_kv.vnode_md_cache_size",
  fun(Conf) ->
    case cuttlefish:conf_get("metadata_cache_size", Conf) of
        off -> 0;
        Size -> Size
    end
  end
}.

%%%% Memory backend section
%% @doc The maximum amount of memory consumed per vnode by the memory
%% storage backend.  Minimum: 1MB
{mapping, "memory_backend.max_memory_per_vnode", "riak_kv.memory_backend.max_memory", [
  {datatype, bytesize},
  hidden
]}.

%% @see memory_backend.max_memory
{mapping, "multi_backend.$name.memory_backend.max_memory_per_vnode", "riak_kv.multi_backend", [
  {datatype, bytesize},
  hidden
]}.

{translation,
 "riak_kv.memory_backend.max_memory",
 fun(Conf) ->
  Bytes = cuttlefish:conf_get("memory_backend.max_memory_per_vnode", Conf),
  cuttlefish_util:ceiling(Bytes / 1048576)
 end
}.

%% @doc Each value written will be written with this "time to
%% live". Once that object's time is up, it will be deleted on the
%% next read of its key. Minimum: 1s
{mapping, "memory_backend.ttl", "riak_kv.memory_backend.ttl", [
  {datatype, {duration, s}},
  hidden
]}.

%% @see memory_backend.ttl
{mapping, "multi_backend.$name.memory_backend.ttl", "riak_kv.multi_backend", [
  {datatype, {duration, s}},
  hidden
]}.

%% @doc Measures were added to Riak 1.2 to counteract cross-site
%% scripting and request-forgery attacks. Some reverse-proxies cannot
%% remove the Referer header and make serving data directly from Riak
%% impossible. Turning secure_referer_check = off disables this
%% security check.
{mapping, "secure_referer_check", "riak_kv.secure_referer_check", [
  {datatype, flag},
  {default, on},
  hidden
]}.

%% @doc a dummy value added to riak 2.2.5 to handle upgrades from
%% older riak_ee installs that have `jmx` in their riak.conf
{mapping, "jmx", "riak_kv.jmx_dummy", [
  {datatype, flag},
  {default, off},
  hidden
]}.

%% @doc Reading or writing objects bigger than this size will write a
%% warning in the logs.
{mapping, "object.size.warning_threshold", "riak_kv.warn_object_size", [
  {datatype, bytesize},
  {default, "5MB"}
]}.

%% @doc Writing an object bigger than this will send a failure to the
%% client.
{mapping, "object.size.maximum", "riak_kv.max_object_size", [
  {datatype, bytesize},
  {default, "50MB"}
]}.

%% @doc Writing an object with more than this number of siblings will
%% generate a warning in the logs.
{mapping, "object.siblings.warning_threshold", "riak_kv.warn_siblings", [
  {datatype, integer},
  {default, 25}
]}.

%% @doc Writing an object with more than this number of siblings will
%% send a failure to the client.
{mapping, "object.siblings.maximum", "riak_kv.max_siblings", [
  {datatype, integer},
  {default, 100}
]}.

%% @doc The strategy used when merging objects that potentially have
%% conflicts.
%%
%% * 2: Riak 2.0 typed bucket default - reduces sibling creation through additional
%%      metadata on each sibling (also known as dotted version vectors)
%% * 1: Riak 1.4, default buckets, and earlier default - may duplicate siblings
%%      from interleaved writes (sibling explosion.)
{mapping, "buckets.default.merge_strategy", "riak_core.default_bucket_props.dvv_enabled", [
  {default, '1'},
  {datatype, {flag, '2', '1'}},
  hidden
]}.

%% @doc The number of primary replicas (non-fallback) that must reply
%% to a read request.
{mapping, "buckets.default.pr", "riak_core.default_bucket_props.pr", [
  {datatype, [integer, {enum, [quorum, all]}]},
  {default, 0},
  hidden
]}.

%% @doc The number of replicas which must reply to a read request.
{mapping, "buckets.default.r", "riak_core.default_bucket_props.r", [
  {datatype, [{enum, [quorum, all]}, integer]},
  {default, quorum},
  hidden
]}.

%% @doc The number of replicas which must reply to a write request,
%% indicating that the write was received.
{mapping, "buckets.default.w", "riak_core.default_bucket_props.w", [
  {datatype, [{enum, [quorum, all]}, integer]},
  {default, quorum},
  hidden
]}.

%% @doc The number of primary replicas (non-fallback) which must reply
%% to a write request.
{mapping, "buckets.default.pw", "riak_core.default_bucket_props.pw", [
  {datatype, [integer, {enum, [quorum, all]}]},
  {default, 0},
  hidden
]}.

%% @doc The number of replicas which must reply to a write request,
%% indicating that the write was committed to durable storage.
{mapping, "buckets.default.dw", "riak_core.default_bucket_props.dw", [
  {datatype, [{enum, [quorum, all]}, integer]},
  {default, quorum},
  hidden
]}.

%% @doc The number of replicas which must reply to a delete request.
{mapping, "buckets.default.rw", "riak_core.default_bucket_props.rw", [
  {datatype, [{enum, [quorum, all]}, integer]},
  {default, quorum},
  hidden
]}.

%% @doc Whether not-founds will count toward a quorum of reads.
{mapping,
 "buckets.default.notfound_ok",
 "riak_core.default_bucket_props.notfound_ok", [
  {default, true},
  {datatype, {enum, [true, false]}},
  hidden
]}.

%% @doc Whether not-founds will invoke the "basic quorum"
%% optimization. This setting will short-circuit fetches where the
%% majority of replicas report that the key is not found. Only used
%% when notfound_ok = false.
{mapping,
 "buckets.default.basic_quorum",
 "riak_core.default_bucket_props.basic_quorum", [
 {default, false},
 {datatype, {enum, [true, false]}},
 hidden
]}.

%% @doc Whether or not siblings are allowed, by default, for untyped buckets.
%% Note: See Vector Clocks for a discussion of sibling resolution.
{mapping, "buckets.default.allow_mult", "riak_core.default_bucket_props.allow_mult", [
  {datatype, {enum, [true, false]}},
  {default, false},
  hidden
]}.

%% @doc Whether conflicting writes resolve via timestamp.
{mapping,
  "buckets.default.last_write_wins",
  "riak_core.default_bucket_props.last_write_wins", [
  {datatype, {enum, [true, false]}},
  {default, false},
  hidden
]}.

%% @doc A space delimited list of functions that will be run before a
%% value is stored, and that can abort the write. For Erlang
%% functions, use "module:function" and for JavaScript, use
%% "functionName".
{mapping, "buckets.default.precommit", "riak_core.default_bucket_props.precommit", [
  hidden
]}.

{translation, "riak_core.default_bucket_props.precommit",
 fun(Conf) ->
  RawString = cuttlefish:conf_get("buckets.default.precommit", Conf, []),
  StringList = string:tokens(RawString, " "),
  [ begin
    case string:tokens(String, ":") of
        %% Javascript make this:  {struct, [{<<"name">>, <<"SomeJS.nonsense">>}]}
        [JavascriptFunction] ->
            {struct, [{<<"name">>, list_to_binary(JavascriptFunction)}]};
        %% Erlang make this: {struct, [{<<"mod">>, <<"module">>}, {<<"fun">>,<<"function">>}]}
        [Module, Function] ->
            {struct, [
                      {<<"mod">>, list_to_binary(Module)},
                      {<<"fun">>, list_to_binary(Function)}
                     ]};
        _ -> cuttlefish:invalid("incorrect hook format '" ++ String ++ "'")
    end
  end || String <- StringList]
 end
}.

%% @doc A space delimited list of functions that will be run after a
%% value is stored. Only Erlang functions are allowed, using the
%% "module:function" format.
{mapping, "buckets.default.postcommit", "riak_core.default_bucket_props.postcommit", [
  hidden
]}.

{translation, "riak_core.default_bucket_props.postcommit",
 fun(Conf) ->
   RawString = cuttlefish:conf_get("buckets.default.postcommit", Conf, []),
   StringList = string:tokens(RawString, " "),
   [ begin
     case string:tokens(String, ":") of
         [Module, Function] ->
             {struct, [
                       {<<"mod">>, list_to_binary(Module)},
                       {<<"fun">>, list_to_binary(Function)}
                      ]};
         _ -> cuttlefish:invalid("incorrect hook format '" ++ String ++ "'")
     end
   end ||  String <- StringList]
 end
}.

%% @doc Whether serialized datatypes will use compression, and at what
%% level. When an integer, this refers to the aggressiveness (and
%% slowness) of compression, on a scale from 0 to 9. 'on' is
%% equivalent to 6, 'off' is equivalent to 0.
{mapping, "datatypes.compression_level", "riak_dt.binary_compression", [
    {datatype, [integer, flag]},
    {default, 1},
    {validators, ["is_compression_value"]},
    hidden
]}.

{validator, "is_compression_value", "must be on/off or a value between 0 and 9",
 fun(Value)->
    is_boolean(Value) orelse (is_integer(Value) andalso Value =< 9 andalso Value >= 0)
 end}.

%% @doc Whether to use the background manager to limit KV handoff.
%% This will help to prevent system response degradation under times
%% of heavy load from multiple background tasks that contend for the
%% same resources.
%% @see background_manager
{mapping, "handoff.use_background_manager", "riak_kv.handoff_use_background_manager", [
    {datatype, flag},
    {default, off},
    hidden
]}.

%% @doc The maximum number of times that a secondary system like Riak
%% Search 2.0 can block handoff of primary key-value data. The
%% approximate maximum duration handoff of a vnode can be blocked for
%% can be determined by multiplying this number by the value of
%% "vnode_management_timer". To prevent handoff from ever being
%% blocked by a secondary system set this value to 0.
%% @see vnode_management_timer
{mapping, "handoff.max_rejects", "riak_kv.handoff_rejected_max", [
    {datatype, integer},
    {default, "6"},
    hidden
]}.

%% @doc Whether to use the background manager to limit AAE tree
%% rebuilds. This will help to prevent system response degradation
%% under times of heavy load from multiple background tasks that
%% contend for the same resources.
%% @see background_manager
{mapping, "anti_entropy.use_background_manager", "riak_kv.aae_use_background_manager", [
    {datatype, flag},
    {default, off},
    hidden
]}.

%% @doc Time in between the checks that trigger Bitcask merges.
{mapping, "bitcask.merge_check_interval", "riak_kv.bitcask_merge_check_interval", [
  {default, "3m"},
  {datatype, {duration, ms}},
  hidden
]}.

%% @doc Jitter used to randomize the time in between the checks that trigger
%% Bitcask merges.
{mapping, "bitcask.merge_check_jitter", "riak_kv.bitcask_merge_check_jitter", [
  {default, "30%"},
  {datatype, {percent, float}},
  hidden
]}.

%% @doc Maximum amount of data to merge in one go in the Bitcask backend.
{mapping, "bitcask.max_merge_size", "riak_kv.bitcask_max_merge_size", [
  {default, "100GB"},
  {datatype, bytesize},
  hidden
]}.

%% @doc Whether to allow list buckets.
{mapping, "cluster.job.riak_kv.list_buckets", "riak_core.job_accept_class", [
    merge,
    {datatype, {flag, enabled, disabled}},
    {default, enabled},
    {commented, enabled}
]}.

%% @doc Whether to allow streaming list buckets.
{mapping, "cluster.job.riak_kv.stream_list_buckets", "riak_core.job_accept_class", [
    merge,
    {datatype, {flag, enabled, disabled}},
    {default, enabled},
    {commented, enabled}
]}.

%% @doc Whether to allow list keys.
{mapping, "cluster.job.riak_kv.list_keys", "riak_core.job_accept_class", [
    merge,
    {datatype, {flag, enabled, disabled}},
    {default, enabled},
    {commented, enabled}
]}.

%% @doc Whether to allow streaming list keys.
{mapping, "cluster.job.riak_kv.stream_list_keys", "riak_core.job_accept_class", [
    merge,
    {datatype, {flag, enabled, disabled}},
    {default, enabled},
    {commented, enabled}
]}.

%% @doc Whether to allow secondary index queries.
{mapping, "cluster.job.riak_kv.secondary_index", "riak_core.job_accept_class", [
    merge,
    {datatype, {flag, enabled, disabled}},
    {default, enabled},
    {commented, enabled}
]}.

%% @doc Whether to allow streaming secondary index queries.
{mapping, "cluster.job.riak_kv.stream_secondary_index", "riak_core.job_accept_class", [
    merge,
    {datatype, {flag, enabled, disabled}},
    {default, enabled},
    {commented, enabled}
]}.

%% @doc Whether to allow term-based map-reduce.
{mapping, "cluster.job.riak_kv.map_reduce", "riak_core.job_accept_class", [
    merge,
    {datatype, {flag, enabled, disabled}},
    {default, enabled},
    {commented, enabled}
]}.

%% @doc Whether to allow JavaScript map-reduce.
{mapping, "cluster.job.riak_kv.map_reduce_js", "riak_core.job_accept_class", [
    merge,
    {datatype, {flag, enabled, disabled}},
    {default, enabled},
    {commented, enabled}
]}.


%% @doc For Tictac full-sync does all data need to be sync'd, or should a 
%% specific bucket be sync'd (bucket), or a specific bucket type (type). 
%% Note that in most cases sync of all data is lower overhead than sync of
%% a subset of data - as cached AAE trees will be used.
%% TODO: type is not yet implemented.  
{mapping, "ttaaefs_scope", "riak_kv.ttaaefs_scope", [
  {datatype, {enum, [all, bucket, type, disabled]}},
  {default, disabled}
]}.

%% @doc For tictac full-sync what registered queue name on this cluster should
%% be use for passing references to data which needs to be replicated for AAE
%% full-sync.  This queue name must be defined as a
%% `riak_kv.replq<n>_queuename`, but need not be exlusive to full-sync (i.e. a
%% real-time replication queue may be used as well)
{mapping, "ttaaefs_queuename", "riak_kv.ttaaefs_queuename", [
  {datatype, atom},
  {default, q1_ttaaefs}
]}.

%% @doc Tictac full-sync repair is by default uni-directional - each full-sync
%% operation managed from a given cluster will only attempt to prompt repair
%% where that cluster is in advance (i.e the src of a more advanced value).
%% From Riak 3.0.10 it can be bi-directional, and by configuring the queuename
%% of the remote peer that this cluster is consuming from, repairs will be
%% prompted in both directions.
%% This change reduces the waste associated with discovery work that does not
%% lead to repair activity.  To revert back to pre-3.0.10 uni-directional
%% behaviour use the keyword disabled as the peer queuename.
{mapping, "ttaaefs_queuename_peer", "riak_kv.ttaaefs_queuename_peer", [
  {datatype, atom},
  {default, disabled},
  {commented, q1_ttaaefs}
]}.

%% @doc Tictac cluster slice number.  Each cluster can be configued with a
%% slice number to space out full-sync events.  The numbers can be 1..4.  For
%% example when doing bi-directional replication give one cluster a slice
%% number of 1, and another a slice number of 3 (or alternatively use 2 and 4)
{mapping, "ttaaefs_cluster_slice", "riak_kv.ttaaefs_cluster_slice", [
    {datatype, integer},
    {default, 1}
]}.

%% @doc For tictac full-sync what is the maximum number of AAE segments to be
%% compared per exchange.  Reducing this will speed up clock compare queries,
%% but will increase the number of exchanges required to complete a repair.
%% If using range_check to speed-up repairs, this can be reduced as the
%% range_check maxresults will be boosted by the ttaaefs_rangeboost  When using
%% range_check a value of 64 is recommended, which may be reduced to 32 or 16
%% if the cluster has a very large volume of keys and/or limited capacity. 
%% Only reduce below 16 in exceptional circumstances.
%% More capacity to process sync queries can be added by increaseing the af2
%% and af3 queue sizes - but this will be at the risk of there being a bigger
%% impact on KV performance when repairs are required.
{mapping, "ttaaefs_maxresults", "riak_kv.ttaaefs_maxresults", [
  {datatype, integer},
  {default, 32}
]}.

%% @doc For tictac full-sync what is the maximum number of AAE segments to be
%% compared per exchange.  When running a range_check query this will be the
%% ttaaefs_max results * ttaaefs_rangeboost.
%% When using range_check, a small maxresults can be used, in effect using
%% other *_check syncs as discovery queries (to find the range_check for the
%% range_check to do the heavy lifting) 
{mapping, "ttaaefs_rangeboost", "riak_kv.ttaaefs_rangeboost", [
  {datatype, integer},
  {default, 16}
]}.

%% @doc For Tictac bucket full-sync which bucket should be sync'd by this
%% node.  Only ascii string bucket definitions supported (which will be 
%% converted using list_to_binary). 
{mapping, "ttaaefs_bucketfilter_name", "riak_kv.ttaaefs_bucketfilter_name", [
  {datatype, string},
  {commented, "sample_bucketname"}
]}.

%% @doc For Tictac bucket full-sync what is the bucket type of the bucket name.  
%% Only ascii string type bucket definitions supported (these
%% definitions will be converted to binary using list_to_binary)
{mapping, "ttaaefs_bucketfilter_type", "riak_kv.ttaaefs_bucketfilter_type", [
  {datatype, string},
  {commented, "default"}
]}.

%% @doc For Tictac bucket-type full-sync what is the bucket type to be sync'd.  
%% Only ascii string type bucket definitions supported (these
%% definitions will be converted to binary using list_to_binary).
%% TODO: Type-based filtering is not yet supported
{mapping, "ttaaefs_buckettype", "riak_kv.ttaaefs_buckettype", [
  {datatype, string}
]}.


%% @doc For Tictac all full-sync which NVAL should be sync'd by this node.
%% This is the `local` nval, as the data in the remote cluster may have an
%% alternative nval.
{mapping, "ttaaefs_localnval", "riak_kv.ttaaefs_localnval", [
  {datatype, integer},
  {default, 3}
]}.

%% @doc For Tictac all full-sync which NVAL should be sync'd in the remote
%% cluster.
{mapping, "ttaaefs_remotenval", "riak_kv.ttaaefs_remotenval", [
  {datatype, integer},
  {default, 3}
]}.

%% @doc The network address of the peer node in the cluster with which this
%% node will connect to for full_sync purposes.  If this peer node is
%% unavailable, then this local node will not perform any full-sync actions,
%% so alternative peer addresses should eb configured in other nodes.  The
%% peer address may be a load-balanced IP to avoid this issue.
{mapping, "ttaaefs_peerip", "riak_kv.ttaaefs_peerip", [
  {datatype, string},
  {commented, "127.0.0.1"},
  {validators, ["valid_ipaddr"]}
]}.

{validator,
  "valid_ipaddr",
  "must be a valid IP address",
  fun(AddrString) ->
    case inet_parse:address(AddrString) of
      {ok, _} -> true;
      {error, _} -> false
    end
  end}.

%% @doc The port to be used when connecting to the remote peer cluster
{mapping, "ttaaefs_peerport", "riak_kv.ttaaefs_peerport", [
  {datatype, integer},
  {commented, 8087}
]}.

%% @doc The protocol to be used when conecting to the peer in the remote
%% cluster.  Could be http or pb, with pb being relatively efficient.  Adding
%% TLS security is possible with the pb protocol by setting the following
%% configuration items:
%% - riak_kv, repl_cacert_filename
%% - riak_kv, repl_cert_filename
%% - riak_kv, repl_key_filename,
%% - riak_kv, repl_username
{mapping, "ttaaefs_peerprotocol", "riak_kv.ttaaefs_peerprotocol", [
  {datatype, {enum, [http, pb]}},
  {default, pb}
]}.

%% @doc The filepath for the ca certificate to validate the ssl connection
%% to the remote peer.
{mapping, "repl_cacert_filename", "riak_kv.repl_cacert_filename", [
    {datatype, string}
]}.

%% @doc The filepath for the certificate to be used by this node when acting
%% as a client for replication.  Must be trusted by the cluster with security
%% enabled
{mapping, "repl_cert_filename", "riak_kv.repl_cert_filename", [
    {datatype, string}
]}.

%% @doc The filepath for the key to related to the certificate in
%% repl_cert_filename
{mapping, "repl_key_filename", "riak_kv.repl_key_filename", [
    {datatype, string}
]}.

%% @doc When security is enabled on the remote cluster for replication as
%% username is required to identify the replication client.  This username
%% must be configured on the remote cluster with appropriate access rights
{mapping, "repl_username", "riak_kv.repl_username", [
    {datatype, string}
]}.

%% @doc How many times per 24hour period should all the data be checked to
%% confirm it is fully sync'd.  When running a full (i.e. nval) sync this will
%% check all the data under that nval between the clusters, and when the trees
%% are out of alignment, will check across all data where the nval matches the
%% specified nval.
%% On large clusters (in terms of key count), this may take a long time - so
%% allcheck should be scheduled infrequently, as other checks may be delayed by
%% consumption of queue resource by the allcheck.
%% The af3_queue size, and the ttaaefs_maxresults, both need to be tuned to
%% ensure that the allcheck can run wihtin the 30 minute timeout.
%% For per-bucket replication all is a reference to all of the data for that
%% bucket, and warnings about sizing are specially relevant. 
{mapping, "ttaaefs_allcheck", "riak_kv.ttaaefs_allcheck", [
  {datatype, integer},
  {default, 0}
]}.

%% @doc How many times per 24hour period should no data be checked to
%% confirm it is fully sync'd.  Use nochecks to align the number of checks
%% done by each node - if each node has the same number of slots, they will
%% naurally space their checks within the period of the slot.
{mapping, "ttaaefs_nocheck", "riak_kv.ttaaefs_nocheck", [
  {datatype, integer},
  {default, 0}
]}.

%% @doc How many times per 24hour period should the last hours data be checked
%% to confirm it is fully sync'd.
%% For per-bucket replication, the tree comparison prompted by this will be
%% constrained by the time period, as well as the keys and clocks checked for
%% repair. For full, nval, replication - the tree comparison is across all
%% time, but the keys and clocks checked for repair are constrained by the time
%% period.
%% Once deltas are outside of the last hour, an hourcheck can do
%% nothing to resolve the data, but will still consume resource.
{mapping, "ttaaefs_hourcheck", "riak_kv.ttaaefs_hourcheck", [
  {datatype, integer},
  {default, 0}
]}.

%% @doc How many times per 24hour period should the last 24-hours of data be
%% checked to confirm it is fully sync'd.
%% For per-bucket replication, the tree comparison prompted by this will be
%% constrained by the time period, as well as the keys and clocks checked for
%% repair. For full, nval, replication - the tree comparison is across all
%% time, but the keys and clocks checked for repair are constrained by the time
%% period.
%% Once deltas are outside of the last hour, a daycheck can do
%% nothing to resolve the data, but will still consume resource.
{mapping, "ttaaefs_daycheck", "riak_kv.ttaaefs_daycheck", [
  {datatype, integer},
  {default, 0}
]}.

%% @doc How many times per 24hour period should the a range_check be run.  The
%% range_check is intended to be a smart check, in that it will:
%% - use a last_modified range starting from the last successful check as its
%% range if the last check was successful (i.e. showed the clusters to be 
%% in sync);
%% - use a range identified by the last check (a last modified range, and
%% perhaps also a specific Bucket) if a range to limit the issues has been
%% identified by a previous failure
%% - Not run at all if the clusters are out of sync and no range has been
%% discovered (this may be the case when running on a sink which is behind a
%% source cluster).
%% For full, nval, sync operations the range is only relevant to the search
%% for objects to repair - the tree comparison is always between all data for
%% that nval.
{mapping, "ttaaefs_rangecheck", "riak_kv.ttaaefs_rangecheck", [
  {datatype, integer},
  {default, 0}
]}.

%% @doc How many times per 24hour period should the an autocheck be run.  The
%% autocheck is intended to be an adaptive check, in that it will:
%% - use a rangecheck if the previous check has identified a range, or;
%% - use a rangecheck if the previous check was a successful sync, using the
%% timestamp of the previous success as low modified date, or;
%% - use a nocheck if the previous check did not sync, but also did not lead
%% to any repairs (e.g. because the sink was ahead), or;
%% - use an allcheck if no range is identified, and the current time is inside
%% the all_check window, or;
%% - use a daycheck otherwise
%% For full, nval, sync operations the range is only relevant to the search
%% for objects to repair, the tree comparison is always between all data for
%% that nval.
{mapping, "ttaaefs_autocheck", "riak_kv.ttaaefs_autocheck", [
  {datatype, integer},
  {default, 24}
]}.

%% @doc Lets you specify when during the day autocheck may run allcheck
%% rather than the less expensive daycheck, when no range has been identified.
%% This restricts the fixing entropy between clusters of non recently-modified
%% data to this window (and hence ensures the cost of fixing this entropy does
%% not fall in prime hours).  Note this window applies to `ttaaefs_autocheck`,
%% which will adapt to the window - any `ttaaefs_allcheck` count that has been
%% configured will ignore the window:
%%
%% * `always` (default) No restrictions
%% * `never` never convert an auto_check to an all_check
%% * `window` Hours during which merging is permitted, where
%%   `ttaaefs_allcheck.window.start` and `ttaaefs_allcheck.window.end` are
%%   integers between 0 and 23.
%%
%% If all_check has a significant impact on performance of your cluster,
%% or your cluster has quiet periods in which little storage activity
%% occurs, you may want to change this setting from the default.
{mapping, "ttaaefs_allcheck.policy", "riak_kv.ttaaefs_allcheck_window", [
  {default, always},
  {datatype, {enum, [always, never, window]}}
]}.

%% @see All check window
{mapping, "ttaaefs_allcheck.window.start", "riak_kv.ttaaefs_allcheck_window", [
  {default, 0},
  {datatype, integer},
  hidden
]}.

%% @see All check window
{mapping, "ttaaefs_allcheck.window.end", "riak_kv.ttaaefs_allcheck_window", [
  {default, 23},
  {datatype, integer},
  hidden
]}.

{translation,
 "riak_kv.ttaaefs_allcheck_window",
 fun(Conf) ->
  Setting = cuttlefish:conf_get("ttaaefs_allcheck.policy", Conf),
    case Setting of
      always -> always;
      never -> never;
      window ->
        Start = cuttlefish:conf_get("ttaaefs_allcheck.window.start", Conf, undefined),
        End = cuttlefish:conf_get("ttaaefs_allcheck.window.end", Conf, undefined),
        {Start, End};
      _Default -> always
    end
 end}.

%% @doc If Tictac AAE full-sync discovers keys to be repaired, should each key
%% that is repaired be logged
{mapping, "ttaaefs_logrepairs", "riak_kv.ttaaefs_logrepairs", [
    {datatype, {flag, enabled, disabled}},
    {default, disabled},
    {commented, enabled}
]}.

%% @doc If Tictac AAE sees difference in trees (for nval-based full
%% comparisons) only, should it attempt to repair those trees as well as
%% repairing any deltas.  Enabling this setting will change the concurrency
%% of fetch_clock_nval queries run to find repairs.
{mapping, "aae_fetchclocks_repair", "riak_kv.aae_fetchclocks_repair", [
    {datatype, {flag, enabled, disabled}},
    {default, disabled},
    {commented, enabled}
]}.

%% @doc Enable this node to act as a real-time replication source
{mapping, "replrtq_enablesrc", "riak_kv.replrtq_enablesrc", [
    {datatype, {flag, enabled, disabled}},
    {default, disabled},
    {commented, enabled}
]}.

%% @doc Limit the number of objects to be cached on the replication queue,
%% with objects queued when the priority queue is beyond this limit stored as
%% clocks only to be fetched on replication
{mapping, "replrtq_srcobjectlimit", "riak_kv.replrtq_srcobjectlimit", [
  {datatype, integer},
  {default, 1000}
]}.

%% @doc Limit the size of an object which may be pushed to the replication
%% queue.  Objects larger than this will still be replicated, but by being
%% re-fetched.  The product of replrtq_objectsize and replrtq_srcobjectlimit
%% gives a theoretical maximum for the total memory consumed by the
%% riak_kv_rpelrtq (in terms of objects).  Default of this product is 200MB.
{mapping, "replrtq_srcobjectsize", "riak_kv.replrtq_srcobjectsize", [
  {datatype, bytesize},
  {default, "200KB"}
]}.

%% @doc Queue definitions
%% Queues should be defined using a pipe '|' delimited string, of two
%% colon ':' delimited elements.  The first part of each queue definition is
%% the ascii name of the queue, the second part indicated the filter to be
%% applied which should be either:
%% - any (all real-time modifications to be replicated via this queue)
%% - block_rtq (no real-time modifications to be replicated)
%% - bucketname.<name_of_bucket>
%% - bucketprefix.<prefix_for_bucket>
%% - buckettype.<name_of_type>
%% The latter three options allow for specific buckets to be supported by the
%% queue, or only buckets with certain prefixes, or for just buckets of a given
%% type.
%% If a list of buckets or types need to be supported, then either multiple
%% queues need to be defined, or non-persistent extended definitions can be
%% made at runtime used the riak_kv_replrtq_src API.
%% Example configurtaion might be:
%% cluster_a:any|cluster_b:block_rtq|cluster_c:bucketprefix.user
{mapping, "replrtq_srcqueue", "riak_kv.replrtq_srcqueue", [
    {datatype, string},
    {default, "q1_ttaaefs:block_rtq"}
]}.

%% @doc Enable this node zlib compress objects over the wire
{mapping, "replrtq_compressonwire", "riak_kv.replrtq_compressonwire", [
    {datatype, {flag, enabled, disabled}},
    {default, disabled},
    {commented, enabled}
]}.

%% @doc Enable this node to act as a sink and consume from a src cluster
{mapping, "replrtq_enablesink", "riak_kv.replrtq_enablesink", [
    {datatype, {flag, enabled, disabled}},
    {default, disabled},
    {commented, enabled}
]}.

%% @doc Queue name  to be used for peers (replrtq_sinkpeers) that are
%% defined without a queue name.  Each node is expected to have a single
%% queue from which it will consume (by name).  This queue may be consumed
%% from multiple peers - and those peers may sit on multiple clusters.
%% If more than one queue name is to be consumed from, real-time changes can
%% be made through `riak_kv_replrtq_snk:add_snkqueue/3`.  The peer list can
%% also be extended to add different queue names into definitions - however
%% it is strongly recommended to use a single sinkqueue name per node.
{mapping, "replrtq_sinkqueue", "riak_kv.replrtq_sinkqueue", [
  {datatype, atom},
  {default, q1_ttaaefs}
]}.

%% @doc A list of peers is required to inform the sink node how to reach the
%% src.  All src nodes will need to have entries consumed - so it is
%% recommended that each src node is referred to in multiple sink node
%% configurations.
%% The list of peers is tokenised as host:port:protocol
%% In exceptional circumstances this definition can be extended to
%% queuename:host:port:protocol - but restricting the definitions of queuename
%% to the single queue specified in replrtq_sinkqueue is strongly recommended.
{mapping, "replrtq_sinkpeers", "riak_kv.replrtq_sinkpeers", [
    {datatype, string},
    {commented, "127.0.0.1:8087:pb"}
]}.

%% @doc The number of workers to be used for each queue must be configured.
{mapping, "replrtq_sinkworkers", "riak_kv.replrtq_sinkworkers", [
    {datatype, integer},
    {default, 24}
]}.

%% @doc The maximum number of workers to be for any given peer may be
%% configured - if not configured will default to the number of sinkworkers
{mapping, "replrtq_sinkpeerlimit", "riak_kv.replrtq_sinkpeerlimit", [
    {datatype, integer},
    {commented, 24}
]}.

%% @doc Enable this node to perform peer discovery for real time replication
%% With peer discovery the configured peers will be used to discover more peers
%% within those clusters.  The peer discovery process will re-discover at a
%% random interval between 60s and `replrtq_prompt_max_seconds`.  Unexpected
%% events will result in the configured peers being used, and discovered peers
%% being ignored.  If disabled the peer discovery process will be initialised,
%% but will remain idle
{mapping, "replrtq_peer_discovery", "riak_kv.replrtq_peer_discovery", [
    {datatype, {flag, enabled, disabled}},
    {default, disabled},
    {commented, enabled}
]}.

%% @doc The maximum number of seconds to wait before re-prompting peer
%% discovery.  Defaults to 15 minutes.  An immediate cluster-wide update can be
%% prompted via `remote_console` using `riak_client:replrtq_reset_all_peers/1`.
{mapping, "replrtq_prompt_max_seconds", "riak_kv.replrtq_prompt_max_seconds", [
    {datatype, integer},
    {commented, 900}
]}.

%% @doc Enable the `recalc` compaction strategy within the leveled backend in
%% riak.  The default (when disabled) is `retain`, but this will leave
%% uncollected garbage within the, journal. 
%% It is now recommended from Riak KV 2.9.2 to consider the `recalc` strategy.
%% This strategy has a side effect of slower startups, and slower recovery
%% from a wiped ledger - but it will not keep an overhead of garbage within
%% the Journal.
%% It should be possible to move from `retain` to `recalc` via configuration
%% change.  However, it is not possible to switch from `recalc` back to
%% `retain`.  This switch can only be made for new nodes receiving data
%% through riak transfers (not inheriting data on disk).
%% The default `retain` strategy retains a history of key changes in the
%% journal, whereas the `recalc` strategy discards that history, but will redo
%% a diff_index_specs calculation when reloading each object.
{mapping, "leveled_reload_recalc", "riak_kv.leveled_reload_recalc", [
    {datatype, {flag, enabled, disabled}},
    {default, disabled},
    {commented, enabled}
]}.

%% @doc Enable logging of query timings in the index_fsm
{mapping, "log_index_fsm", "riak_kv.log_index_fsm", [
    {datatype, {flag, enabled, disabled}},
    {default, disabled},
    {commented, enabled}
]}.

%% @doc Set the vnode worker pool size
%% This is a pool of workers per-vnode, to be used for general queries, in
%% particular secondary index queries.  This now defaults to 5 workers, prior
%% to release 3.0.9 it was set to a default of 10.
%% The number of concurrent index queries that can be supported in the cluster
%% will be equal to n_val * worker_count.
%% The statistic worker_vnode_pool_worktime_mean tracks the average time
%% each worker is taking per query in microseconds, so the overall queries
%% per second supported will be:
%%  (1000000 div worker_vnode_pool_worktime) * n_val * worker_count
%% It should normally be possible to support >> 100 queries per second with 
%% just a single worker per vnode.
%% The statistic worker_vnode_pool_queuetime_mean will track the average time
%% a query is spending on a queue, should the vnode pool be exhausted.
%% If using tictac_aae this should be set to at least 2, as tree rebuilds use
%% this pool as well as queries.  Also consider that long-running legacy
%% queries (list keys and list buckets, not using aae_fold) also use
%% this pool.  All aae_fold type queries will use the alternative
%% node_worker_pool, unless none is used for the worker_pool_strategy, in which
%% case the vnode pool is also used for aae_folds.
{mapping, "worker_pool_size", "riak_kv.worker_pool_size", [
    {datatype, integer},
    {default, 5}
]}.


%% @doc Limit the size of replication queues (for a queue and priority, i.e.
%% each priority on each queue will have this as the limit)
%% This option will be ignored from Release 3.0.10, as the queue has been
%% converted to an overflow queue as part of this release.  Instead use
%% `replrtq_overflow_limit` to control the queue size, including on-disk size.
{mapping, "replrtq_srcqueuelimit", "riak_kv.replrtq_srcqueuelimit", [
    {datatype, integer},
    {default, 300000},
  hidden
]}.

%% @doc Choose to read repair to primary vnodes only
%% When fallback vnodes are elected, then read repair will by default repair
%% any missing data from the vnode - i.e. every GET while the fallback is in
%% play will lead to a PUT to add the rewuested object to the fallback vnode, 
%% as the fallback by default starts empty.
%% If the expectation is that failed vnodes are replaced quickly, as would be
%% possible in a Cloud scenario, this may not be desirable.  Read repair to
%% fallbacks reduce throughput in failure scenarios, and then the hinted
%% handoffs following recovery are impaired by the historic data which is
%% already in the recovered node, and has to be handed off as well as the
%% fresh updates received since the failure.
%% When fallback vnodes are expected to be in place for a long period, the
%% default setting of read repairing fallbacks may be preferred, as it will
%% provide additional data resilience, and potentially improved performance
%% where the same objects are repeatedly fetched.
{mapping, "read_repair_primaryonly", "riak_kv.read_repair_primaryonly", [
    {datatype, {flag, enabled, disabled}},
    {default, disabled}
]}.

%% @doc If reads discovers keys to be repaired, should each key
%% that is repaired be logged
{mapping, "read_repair_log", "riak_kv.read_repair_log", [
    {datatype, {flag, enabled, disabled}},
    {default, disabled},
    hidden
]}.