Skip to content

Commit

Permalink
Add doubled buckets issue
Browse files Browse the repository at this point in the history
  • Loading branch information
yngvar-antonsson committed Nov 21, 2024
1 parent 32ff9f0 commit 930dd54
Show file tree
Hide file tree
Showing 6 changed files with 168 additions and 0 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@ Changed
- Update ``vshard`` dependency to `0.1.30 <https://github.com/tarantool/vshard/releases/tag/0.1.30>`_.
- Update ``http`` dependency to `1.7.0 <https://github.com/tarantool/http/releases/tag/1.7.0>`_.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Added
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

- New issue about doubled buckets (can be enabled with TARANTOOL_CHECK_DOUBLED_BUCKETS=true).

-------------------------------------------------------------------------------
[2.12.4] - 2024-09-16
-------------------------------------------------------------------------------
Expand Down
3 changes: 3 additions & 0 deletions cartridge.lua
Original file line number Diff line number Diff line change
Expand Up @@ -874,13 +874,16 @@ local function cfg(opts, box_opts)

local res, err = argparse.get_opts({
disable_unrecoverable_instances = 'boolean',
check_doubled_buckets = 'boolean',
check_doubled_buckets_period = 'number',
})

if err ~= nil then
return nil, err
end

issues.disable_unrecoverable(res.disable_unrecoverable_instances)
issues.check_doubled_buckets(res.check_doubled_buckets, res.check_doubled_buckets_period)

if opts.upload_prefix ~= nil then
local path = opts.upload_prefix
Expand Down
35 changes: 35 additions & 0 deletions cartridge/issues.lua
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
-- * various vshard alerts (see vshard docs for details);
-- * warning: "Group "..." wasn't bootstrapped: ...";
-- * warning: Vshard storages in replicaset %s marked as "all writable".
-- * warning: "Cluster has ... doubled buckets. Call require('cartridge.vshard-utils').find_doubled_buckets() for details";
-- You can enable extra vshard issues by setting
-- `TARANTOOL_ADD_VSHARD_STORAGE_ALERTS_TO_ISSUES=true/TARANTOOL_ADD_VSHARD_ROUTER_ALERTS_TO_ISSUES=true`
-- or with `--add-vshard-storage-alerts-to-issues/--add-vshard-router-alerts-to-issues` command-line argument.
Expand Down Expand Up @@ -125,6 +126,7 @@ local lua_api_proxy = require('cartridge.lua-api.proxy')
local lua_api_topology = require('cartridge.lua-api.topology')
local invalid_format = require('cartridge.invalid-format')
local sync_spaces = require('cartridge.sync-spaces')
local vshard_utils = require('cartridge.vshard-utils')

local ValidateConfigError = errors.new_class('ValidateConfigError')

Expand Down Expand Up @@ -154,6 +156,9 @@ local limits_ranges = {

vars:new('limits', default_limits)
vars:new('disable_unrecoverable', false)
vars:new('check_doubled_buckets', false)
vars:new('check_doubled_buckets_period', 24*60*60) -- 24 hours

vars:new('instance_uuid')
vars:new('replicaset_uuid')

Expand Down Expand Up @@ -565,6 +570,8 @@ local function list_on_instance(opts)
end

local disk_failure_cache = {}
local doubled_buckets_count_cache = 0
local last_doubled_buckets_check = fiber.time()
local function list_on_cluster()
local state, err = confapplier.get_state()
if state == 'Unconfigured' and lua_api_proxy.can_call() then
Expand Down Expand Up @@ -746,6 +753,28 @@ local function list_on_cluster()
end
end

if vars.check_doubled_buckets == true
and last_doubled_buckets_check + vars.check_doubled_buckets_period > fiber.time()
then
local doubled_buckets = vshard_utils.find_doubled_buckets() or {}
doubled_buckets_count_cache = 0
for _ in pairs(doubled_buckets) do
doubled_buckets_count_cache = doubled_buckets_count_cache + 1
end
last_doubled_buckets_check = fiber.time()
end

if doubled_buckets_count_cache > 0 then
table.insert(ret, {
level = 'warning',
topic = 'vshard',
message = string.format(
"Cluster has %d doubled buckets. " ..
"Call require('cartridge.vshard-utils').find_doubled_buckets() for details",
doubled_buckets_count_cache
)
})
end
-- Get each instance issues (replication, failover, memory usage)

local twophase_vars = require('cartridge.vars').new('cartridge.twophase')
Expand Down Expand Up @@ -859,4 +888,10 @@ return {
disable_unrecoverable = function(disable)
vars.disable_unrecoverable = disable
end,
check_doubled_buckets = function(check, period)
vars.check_doubled_buckets = check
if period ~= nil then
vars.check_doubled_buckets_period = period
end
end,
}
45 changes: 45 additions & 0 deletions cartridge/vshard-utils.lua
Original file line number Diff line number Diff line change
Expand Up @@ -617,6 +617,50 @@ local function can_bootstrap_group(group_name, vsgroup)
return true
end

-- see https://github.com/tarantool/vshard/issues/412 for details
local function find_doubled_buckets()
if roles.get_role('vshard-router') == nil then
return false
end
local vshard = require('vshard')

local BUCKET_COUNT = vshard.router.bucket_count()
local all_buckets = {}
for id = 1, BUCKET_COUNT do
all_buckets[id] = {
count = 0,
info = {},
uuids = {},
}
end

local routes = vshard.router.routeall()
for _, replicaset in pairs(routes) do
local buckets, err = replicaset:callro(
'vshard.storage.buckets_info', {}, {timeout = 5}
)
if err then
return nil, err
end

for id, bucket in pairs(buckets) do
all_buckets[id].count = all_buckets[id].count + 1
table.insert(all_buckets[id].uuids, replicaset.uuid)
table.insert(all_buckets[id].info, bucket)
end
end

local intersection = {}
for id = 1, BUCKET_COUNT do
if all_buckets[id].count > 1 then
intersection[id] = all_buckets[id]
end
end

return intersection
end


local function can_bootstrap()
if roles.get_role('vshard-router') == nil then
return false
Expand Down Expand Up @@ -764,6 +808,7 @@ return {
can_bootstrap = can_bootstrap,
edit_vshard_options = edit_vshard_options,
patch_zone_distances = patch_zone_distances,
find_doubled_buckets = find_doubled_buckets,

init = init,
}
8 changes: 8 additions & 0 deletions rst/cartridge_admin.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1518,6 +1518,14 @@ Cartridge displays cluster and instances issues in WebUI:
* **warning**: "Vshard storages in replicaset ... marked as "all writable".
You can fix it by setting ``all_rw = false`` in the replicaset configuration;

* **warning**: "Cluster has ... doubled buckets. Call require('cartridge.vshard-utils').find_doubled_buckets() for details"
-- you need to call ``require('cartridge.vshard-utils').find_doubled_buckets()`` to get more info
and then remove all duplicated data manually and then use ``vshard.storage.bucket_force_drop(bucket_id)``
to remove the bucket. See https://github.com/tarantool/vshard/issues/412 for details.
This issue is disabled by default. You can enable it by setting
``TARANTOOL_CHECK_DOUBLED_BUCKETS=true`` and then chech will run once a
``TARANTOOL_CHECK_DOUBLED_BUCKETS_PERIOD`` (default is 24 hours);

You can enable extra vshard issues by setting
``TARANTOOL_ADD_VSHARD_STORAGE_ALERTS_TO_ISSUES=true/TARANTOOL_ADD_VSHARD_ROUTER_ALERTS_TO_ISSUES=true``
or with ``--add-vshard-storage-alerts-to-issues/--add-vshard-router-alerts-to-issues`` command-line argument.
Expand Down
71 changes: 71 additions & 0 deletions test/integration/vshard_doubled_buckets_test.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
local fio = require('fio')
local t = require('luatest')
local g = t.group()

local helpers = require('test.helper')

g.before_all = function()
g.cluster = helpers.Cluster:new({
datadir = fio.tempdir(),
server_command = helpers.entrypoint('srv_basic'),
cookie = helpers.random_cookie(),
use_vshard = true,
replicasets = {
{
alias = 'router',
roles = {'vshard-router'},
servers = 1,
},
{
alias = 'storage-1',
roles = {'vshard-storage'},
servers = 1,
},
{
alias = 'storage-2',
roles = {'vshard-storage'},
servers = 1,
},
},
env = {
TARANTOOL_CHECK_DOUBLED_BUCKETS = 'true',
TARANTOOL_CHECK_DOUBLED_BUCKETS_PERIOD = '10',
},
})
g.cluster:start()
end

g.after_all = function()
g.cluster:stop()
fio.rmtree(g.cluster.datadir)
end

function g.test_doubled_buckets()
local bucket = g.cluster:server('storage-2-1'):exec(function()
return box.space._bucket:select(nil, {limit = 1})[1]
end)

g.cluster:server('storage-1-1'):exec(function(bucket)
box.space._bucket:run_triggers(false)
return box.space._bucket:insert(bucket)
end, {bucket})

t.helpers.retrying({timeout = 20}, function()
t.assert_covers(helpers.list_cluster_issues(g.cluster.main_server), {
{
level = 'warning',
topic = 'vshard',
message = "Cluster has 1 doubled buckets. " ..
"Call require('cartridge.vshard-utils').find_doubled_buckets() for details",
},
})
end)

g.cluster:server('storage-1-1'):exec(function(bucket)
return box.space._bucket:delete(bucket[1])
end, {bucket})

t.helpers.retrying({timeout = 20}, function()
t.assert_covers(helpers.list_cluster_issues(g.cluster.main_server), {})
end)
end

0 comments on commit 930dd54

Please sign in to comment.