Skip to content
This repository has been archived by the owner on Jun 23, 2022. It is now read-only.

feat(split): supplement two cases after implementing cancel split #742

Merged
merged 1 commit into from
Feb 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions src/meta/meta_split_service.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,6 @@ void meta_split_service::do_start_partition_split(std::shared_ptr<app_state> app
_state->get_app_path(*app), std::move(value), on_write_storage_complete);
}

// TODO(heyuchen): refactor this function
void meta_split_service::register_child_on_meta(register_child_rpc rpc)
{
const auto &request = rpc.request();
Expand Down Expand Up @@ -166,7 +165,16 @@ void meta_split_service::register_child_on_meta(register_child_rpc rpc)
return;
}

// TODO(heyuchen): pause/cancel split check
if (child_gpid.get_partition_index() >= app->partition_count) {
derror_f(
"app({}) partition({}) register child({}) failed, partition split has been canceled",
app_name,
parent_gpid,
child_gpid);
response.err = ERR_INVALID_STATE;
response.parent_config = parent_config;
return;
}

auto iter = app->helpers->split_states.status.find(parent_gpid.get_partition_index());
if (iter == app->helpers->split_states.status.end()) {
Expand Down
25 changes: 18 additions & 7 deletions src/meta/server_state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -834,13 +834,24 @@ void server_state::on_config_sync(configuration_query_by_node_rpc rpc)
rep.pid.get_partition_index());
std::shared_ptr<app_state> app = get_app(rep.pid.get_app_id());
if (app == nullptr || rep.pid.get_partition_index() >= app->partition_count) {
// app is not recognized or partition is not recognized
dassert(false,
"gpid(%d.%d) on node(%s) is not exist on meta server, administrator "
"should check consistency of meta data",
rep.pid.get_app_id(),
rep.pid.get_partition_index(),
request.node.to_string());
// This app has garbage partition after cancel split, the canceled child
// partition should be gc
if (app != nullptr &&
rep.pid.get_partition_index() < app->partition_count * 2 &&
rep.status == partition_status::PS_ERROR) {
response.gc_replicas.push_back(rep);
dwarn_f("notify node({}) to gc replica({}) because it is useless partition "
"which is caused by cancel split",
request.node.to_string(),
rep.pid);
} else {
// app is not recognized or partition is not recognized
dassert(false,
"gpid({}) on node({}) is not exist on meta server, administrator "
"should check consistency of meta data",
rep.pid,
request.node.to_string());
}
Comment on lines 836 to +854
Copy link
Contributor

@neverchanje neverchanje Feb 9, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are too many embedded levels, you should have sensed a very bad smell in this code. Try to make this big if into a function.

// check_partition_index_validity returns false if the replica is should be GCed.
bool check_partition_index_validity(const shared_ptr<app_state>& app, gpid request_pid) {
  bool should_assert=false;
  if (app == nullptr) {
     should_assert=true;
  } else if (rep.status != partition_status::PS_ERROR && rep.pid.get_partition_index() >= app->partition_count * 2) {
    should_assert=true;
  }
  if (should_assert) {
    dassert(false);
  }
}

if (!check_partition_index_validity( req.pid)) {
  response.gc_replicas.push_back(rep);
}

} else if (app->status == app_status::AS_DROPPED) {
if (app->expire_second == 0) {
ddebug("gpid(%d.%d) on node(%s) is of dropped table, but expire second is "
Expand Down
11 changes: 6 additions & 5 deletions src/meta/test/meta_split_service_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ TEST_F(meta_split_service_test, register_child_test)
// Test case:
// - request is out-dated
// - child has been registered
// - TODO(heyuchen): parent partition has been paused splitting
// - parent partition has been paused splitting
// - parent partition is sync config to remote storage
// - register child succeed
struct register_test
Expand All @@ -285,6 +285,7 @@ TEST_F(meta_split_service_test, register_child_test)
} tests[] = {
{PARENT_BALLOT - 1, false, false, false, ERR_INVALID_VERSION, false},
{PARENT_BALLOT, true, false, false, ERR_CHILD_REGISTERED, false},
{PARENT_BALLOT, false, true, false, ERR_INVALID_STATE, false},
{PARENT_BALLOT, false, false, true, ERR_IO_PENDING, false},
{PARENT_BALLOT, false, false, false, ERR_OK, true},
};
Expand All @@ -295,7 +296,7 @@ TEST_F(meta_split_service_test, register_child_test)
mock_child_registered();
}
if (test.mock_parent_paused) {
// TODO(heyuchen): mock split paused
mock_split_states(split_status::PAUSED, PARENT_INDEX);
}
if (test.mock_pending) {
app->helpers->contexts[PARENT_INDEX].stage = config_status::pending_remote_sync;
Expand Down Expand Up @@ -330,21 +331,21 @@ TEST_F(meta_split_service_test, on_config_sync_test)
// Test case:
// - partition is splitting
// - partition is not splitting
// - TODO(heyuchen): partition split is paused({false, true, 1})
// - partition split is paused
struct config_sync_test
{
bool mock_child_registered;
bool mock_parent_paused;
int32_t expected_count;
} tests[] = {{false, false, 1}, {true, false, 0}};
} tests[] = {{false, false, 1}, {true, false, 0}, {false, true, 1}};

for (const auto &test : tests) {
mock_app_partition_split_context();
if (test.mock_child_registered) {
mock_child_registered();
}
if (test.mock_parent_paused) {
// TODO(heyuchen): TBD
mock_split_states(split_status::PAUSED, PARENT_INDEX);
}
ASSERT_EQ(on_config_sync(req), test.expected_count);
}
Expand Down
1 change: 1 addition & 0 deletions src/replica/split/replica_split_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1053,6 +1053,7 @@ void replica_split_manager::on_register_child_on_meta_reply(

// update primary parent group partition_count
update_local_partition_count(_replica->_app_info.partition_count * 2);
_meta_split_status = split_status::NOT_SPLIT;
_replica->broadcast_group_check();

parent_cleanup_split_context();
Expand Down