Skip to content

Commit

Permalink
1. Never transition from s_donor directly to s_synced, always wait
Browse files Browse the repository at this point in the history
   for SYNCED event as expected.
2. Fix transition to `s_joined` only after we have a complete state.
   Complete state is reached in the following 3 cases:
   - SST seqno exceeds connected seqno
   - view seqno equals connected seqno (view processed == view connected)
   - current state is `s_donor`

Refs #175
  • Loading branch information
ayurchen committed Nov 30, 2021
1 parent 31a35bf commit 8f59e7b
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 45 deletions.
4 changes: 4 additions & 0 deletions src/reporter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ wsrep::reporter::substate_map(enum wsrep::server_state::state const state)
return s_disconnected_initializing;
else if (s_joining_sst == state_)
return s_joining_initializing;
else if (s_joining_initializing == state_)
return s_joining_initializing; // continuation
else
{
assert(0);
Expand All @@ -87,6 +89,8 @@ wsrep::reporter::substate_map(enum wsrep::server_state::state const state)
return s_disconnected_initialized;
else if (s_joining_initializing == state_)
return s_joining_ist;
else if (s_joining_ist == state_)
return s_joining_ist; // continuation
else
{
assert(0);
Expand Down
64 changes: 19 additions & 45 deletions src/server_state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -668,8 +668,10 @@ int wsrep::server_state::start_sst(const std::string& sst_request,
if (server_service_.start_sst(sst_request, gtid, bypass))
{
lock.lock();
wsrep::log_warning() << "SST start failed";
state(lock, s_synced);
wsrep::log_warning() << "SST preparation failed";
// v26 API does not have JOINED event, so in anticipation of SYNCED
// we must do it here.
state(lock, s_joined);
ret = 1;
}
return ret;
Expand All @@ -683,6 +685,8 @@ void wsrep::server_state::sst_sent(const wsrep::gtid& gtid, int error)
wsrep::log_info() << "SST sending failed: " << error;

wsrep::unique_lock<wsrep::mutex> lock(mutex_);
// v26 API does not have JOINED event, so in anticipation of SYNCED
// we must do it here.
state(lock, s_joined);
lock.unlock();
enum provider::status const retval(provider().sst_sent(gtid, error));
Expand Down Expand Up @@ -719,7 +723,6 @@ void wsrep::server_state::sst_received(wsrep::client_service& cs,
assert(init_initialized_);
}
}
state(lock, s_joined);
lock.unlock();

if (id_.is_undefined())
Expand Down Expand Up @@ -893,7 +896,7 @@ void wsrep::server_state::on_connect(const wsrep::view& view)
}

void wsrep::server_state::on_primary_view(
const wsrep::view& view WSREP_UNUSED,
const wsrep::view& view,
wsrep::high_priority_service* high_priority_service)
{
wsrep::unique_lock<wsrep::mutex> lock(mutex_);
Expand Down Expand Up @@ -930,30 +933,16 @@ void wsrep::server_state::on_primary_view(
// If server side has already been initialized,
// skip directly to s_joined.
state(lock, s_initialized);
state(lock, s_joined);
}
}
else if (state_ == s_joiner)
{
// Got partiioned from the cluster, got IST and
// started applying actions.
state(lock, s_joined);
}
}
else
{
if (state_ == s_connected)
{
state(lock, s_joiner);
}
if (init_initialized_ && state_ != s_joined)
{
// If server side has already been initialized,
// skip directly to s_joined.
state(lock, s_joined);
}
}

if (init_initialized_ == false)
{
lock.unlock();
Expand All @@ -978,27 +967,11 @@ void wsrep::server_state::on_primary_view(
close_orphaned_sr_transactions(lock, *high_priority_service);
}

if (server_service_.sst_before_init())
{
if (state_ == s_initialized)
{
state(lock, s_joined);
if (init_synced_)
{
state(lock, s_synced);
}
}
}
else
if (state(lock) < s_joined &&
view.state_id().seqno() >= connected_gtid().seqno())
{
if (state_ == s_joiner)
{
state(lock, s_joined);
if (init_synced_)
{
state(lock, s_synced);
}
}
// If we progressed beyond connected seqno, it means we have full state
state(lock, s_joined);
}
}

Expand Down Expand Up @@ -1079,13 +1052,14 @@ void wsrep::server_state::on_sync()
{
case s_synced:
break;
case s_connected:
state(lock, s_joiner);
WSREP_FALLTHROUGH;
case s_joiner:
state(lock, s_initializing);
case s_connected: // Seed node path: provider becomes
state(lock, s_joiner); // synced with itself before anything
WSREP_FALLTHROUGH; // else. Then goes DB initialization.
case s_joiner: // |
state(lock, s_initializing); // V
break;
case s_donor:
assert(false); // this should never happen
state(lock, s_joined);
state(lock, s_synced);
break;
Expand Down Expand Up @@ -1352,14 +1326,14 @@ void wsrep::server_state::state(
assert(lock.owns_lock());
static const char allowed[n_states_][n_states_] =
{
/* dis, ing, ized, cted, jer, jed, dor, sed, ding */
/* dis, ing, ized, cted, jer, jed, dor, sed, ding to/from */
{ 0, 1, 0, 1, 0, 0, 0, 0, 0}, /* dis */
{ 1, 0, 1, 0, 0, 0, 0, 0, 1}, /* ing */
{ 1, 0, 0, 1, 0, 1, 0, 0, 1}, /* ized */
{ 1, 0, 0, 1, 1, 0, 0, 1, 1}, /* cted */
{ 1, 1, 0, 0, 0, 1, 0, 0, 1}, /* jer */
{ 1, 0, 0, 1, 0, 0, 1, 1, 1}, /* jed */
{ 1, 0, 0, 1, 0, 1, 0, 1, 1}, /* dor */
{ 1, 0, 0, 1, 0, 1, 0, 0, 1}, /* dor */
{ 1, 0, 0, 1, 0, 1, 1, 0, 1}, /* sed */
{ 1, 0, 0, 0, 0, 0, 0, 0, 0} /* ding */
};
Expand Down

0 comments on commit 8f59e7b

Please sign in to comment.