-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Integrate CRUD statistics with metrics
If `metrics` [1] found, you can use metrics collectors to store statistics. It is required to use `>= 0.9.0` to support age buckets in summary and crucial bugfixes under high load [2]. The metrics are part of global registry and can be exported together (e.g. to Prometheus) with default tools without any additional configuration. Disabling stats destroys the collectors. Local collectors are used by default. To use metrics driver, call `crud.enable_stats{ driver = 'metrics' }`. Be wary that using metrics collectors may drop overall performance. Running them with existing perf tests have shown the drop to 2-3 times in rps. Raising quantile tolerance may result in even more crucial performance drops. If `metrics` used, `latency` statistics are changed to 0.99 quantile of request execution time (with aging). Add CI matrix to run tests with `metrics` installed. 1. https://github.com/tarantool/metrics 2. tarantool/metrics#235 Closes #224
- Loading branch information
1 parent
8545818
commit 80c3fde
Showing
8 changed files
with
702 additions
and
42 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,230 @@ | ||
local is_package, metrics = pcall(require, 'metrics') | ||
|
||
local label = require('crud.stats.label') | ||
local dev_checks = require('crud.common.dev_checks') | ||
local registry_common = require('crud.stats.registry_common') | ||
|
||
local registry = {} | ||
local _registry = {} | ||
|
||
local metric_name = { | ||
-- Summary collector for all operations. | ||
op = 'tnt_crud_stats', | ||
-- `*_count` and `*_sum` are automatically created | ||
-- by summary collector. | ||
op_count = 'tnt_crud_stats_count', | ||
op_sum = 'tnt_crud_stats_sum', | ||
|
||
-- Counter collectors for select/pairs details. | ||
tuples_fetched = 'tnt_crud_tuples_fetched', | ||
tuples_lookup = 'tnt_crud_tuples_lookup', | ||
map_reduces = 'tnt_crud_map_reduces', | ||
} | ||
|
||
local LATENCY_QUANTILE = 0.99 | ||
|
||
local DEFAULT_QUANTILES = { | ||
[LATENCY_QUANTILE] = 1e-2, | ||
} | ||
|
||
local DEFAULT_SUMMARY_PARAMS = { | ||
age_buckets_count = 2, | ||
max_age_time = 60, | ||
} | ||
|
||
--- Check if application supports metrics rock for registry | ||
-- | ||
-- `metrics >= 0.9.0` is required to use summary with | ||
-- age buckets. `metrics >= 0.5.0, < 0.9.0` is unsupported | ||
-- due to quantile overflow bug | ||
-- (https://github.com/tarantool/metrics/issues/235). | ||
-- | ||
-- @function is_supported | ||
-- | ||
-- @treturn boolean Returns true if `metrics >= 0.9.0` found, false otherwise. | ||
-- | ||
function registry.is_supported() | ||
if is_package == false then | ||
return false | ||
end | ||
|
||
-- Only metrics >= 0.9.0 supported. | ||
local is_summary, summary = pcall(require, 'metrics.collectors.summary') | ||
if is_summary == false or summary.rotate_age_buckets == nil then | ||
return false | ||
end | ||
|
||
return true | ||
end | ||
|
||
|
||
--- Initialize collectors in global metrics registry | ||
-- | ||
-- @function init | ||
-- | ||
-- @treturn boolean Returns true. | ||
-- | ||
function registry.init() | ||
_registry[metric_name.op] = metrics.summary( | ||
metric_name.op, | ||
'CRUD router calls statistics', | ||
DEFAULT_QUANTILES, | ||
DEFAULT_SUMMARY_PARAMS) | ||
|
||
_registry[metric_name.tuples_fetched] = metrics.counter( | ||
metric_name.tuples_fetched, | ||
'Tuples fetched from CRUD storages during select/pairs') | ||
|
||
_registry[metric_name.tuples_lookup] = metrics.counter( | ||
metric_name.tuples_lookup, | ||
'Tuples looked up on CRUD storages while collecting response during select/pairs') | ||
|
||
_registry[metric_name.map_reduces] = metrics.counter( | ||
metric_name.map_reduces, | ||
'Map reduces planned during CRUD select/pairs') | ||
|
||
return true | ||
end | ||
|
||
--- Unregister collectors in global metrics registry | ||
-- | ||
-- @function destroy | ||
-- | ||
-- @treturn boolean Returns true. | ||
-- | ||
function registry.destroy() | ||
for _, c in pairs(_registry) do | ||
metrics.registry:unregister(c) | ||
end | ||
|
||
_registry = {} | ||
return true | ||
end | ||
|
||
--- Get copy of global metrics registry | ||
-- | ||
-- @function get | ||
-- | ||
-- @treturn table Returns copy of metrics registry. | ||
function registry.get() | ||
local stats = {} | ||
|
||
-- Fill empty collectors with zero values. | ||
for _, op_label in pairs(label) do | ||
stats[op_label] = registry_common.build_collector(op_label) | ||
end | ||
|
||
for _, obs in ipairs(_registry[metric_name.op]:collect()) do | ||
local operation = obs.label_pairs.operation | ||
local status = obs.label_pairs.status | ||
if obs.metric_name == metric_name.op then | ||
if obs.label_pairs.quantile == LATENCY_QUANTILE then | ||
stats[operation][status].latency = obs.value | ||
end | ||
elseif obs.metric_name == metric_name.op_sum then | ||
stats[operation][status].time = obs.value | ||
elseif obs.metric_name == metric_name.op_count then | ||
stats[operation][status].count = obs.value | ||
end | ||
end | ||
|
||
local _, obs_tuples_fetched = next(_registry[metric_name.tuples_fetched]:collect()) | ||
if obs_tuples_fetched ~= nil then | ||
stats[label.SELECT].details.tuples_fetched = obs_tuples_fetched.value | ||
end | ||
|
||
local _, obs_tuples_lookup = next(_registry[metric_name.tuples_lookup]:collect()) | ||
if obs_tuples_lookup ~= nil then | ||
stats[label.SELECT].details.tuples_lookup = obs_tuples_lookup.value | ||
end | ||
|
||
local _, obs_map_reduces = next(_registry[metric_name.map_reduces]:collect()) | ||
if obs_map_reduces ~= nil then | ||
stats[label.SELECT].details.map_reduces = obs_map_reduces.value | ||
end | ||
|
||
return stats | ||
end | ||
|
||
--- Increase requests count and update latency info | ||
-- | ||
-- @function observe | ||
-- | ||
-- @tparam string op_label | ||
-- Label of registry collectos. | ||
-- Use `require('crud.common.const').OP` to pick one. | ||
-- | ||
-- @tparam boolean success | ||
-- true if no errors on execution, false otherwise. | ||
-- | ||
-- @tparam number latency | ||
-- Time of call execution. | ||
-- | ||
-- @treturn boolean Returns true. | ||
-- | ||
|
||
local total = 0 | ||
|
||
function registry.observe(op_label, success, latency) | ||
dev_checks('string', 'boolean', 'number') | ||
|
||
local label_pairs = { operation = op_label } | ||
if success == true then | ||
label_pairs.status = 'ok' | ||
else | ||
label_pairs.status = 'error' | ||
end | ||
|
||
local clock = require('clock') | ||
local start = clock.monotonic() | ||
_registry[metric_name.op]:observe(latency, label_pairs) | ||
local diff = clock.monotonic() - start | ||
-- require('log').error("latency: %f", latency) | ||
-- require('log').error("diff: %f", diff) | ||
total = total + diff | ||
-- require('log').error("total: %f", total) | ||
|
||
return true | ||
end | ||
|
||
--- Increase statistics of storage select/pairs calls | ||
-- | ||
-- @function observe_fetch | ||
-- | ||
-- @tparam number tuples_fetched | ||
-- Count of tuples fetched during storage call. | ||
-- | ||
-- @tparam number tuples_lookup | ||
-- Count of tuples looked up on storages while collecting response. | ||
-- | ||
-- @treturn boolean Returns true. | ||
-- | ||
function registry.observe_fetch(tuples_fetched, tuples_lookup) | ||
dev_checks('number', 'number') | ||
|
||
local label_pairs = { operation = label.SELECT } | ||
|
||
_registry[metric_name.tuples_fetched]:inc(tuples_fetched, label_pairs) | ||
_registry[metric_name.tuples_lookup]:inc(tuples_lookup, label_pairs) | ||
return true | ||
end | ||
|
||
--- Increase statistics of planned map reduces during select/pairs | ||
-- | ||
-- @function observe_map_reduces | ||
-- | ||
-- @tparam number count | ||
-- Count of map reduces planned. | ||
-- | ||
-- @treturn boolean Returns true. | ||
-- | ||
function registry.observe_map_reduces(count) | ||
dev_checks('number') | ||
|
||
local label_pairs = { operation = label.SELECT } | ||
|
||
_registry[metric_name.map_reduces]:inc(count, label_pairs) | ||
return true | ||
end | ||
|
||
return registry |
Oops, something went wrong.