Skip to content

Commit

Permalink
1. Never transition from s_donor directly to s_synced, always wait
Browse files Browse the repository at this point in the history
   for SYNCED event as expected.
2. Fix transition to `s_joined` only after we have a complete state.
   Conditions for `s_joined` are either:
   - SST seqno exceeds connected seqno
   - committed seqno exceeds connected seqno
   - current state is `s_donor`

Refs #175
  • Loading branch information
ayurchen committed Nov 28, 2021
1 parent 3f79d43 commit 714a4bc
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 10 deletions.
11 changes: 11 additions & 0 deletions include/wsrep/seqno.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,17 @@ namespace wsrep
{
return (seqno_ > other.seqno_);
}

bool operator<=(seqno other) const
{
return !(seqno_ > other.seqno_);
}

bool operator>=(seqno other) const
{
return !(seqno_ < other.seqno_);
}

bool operator==(seqno other) const
{
return (seqno_ == other.seqno_);
Expand Down
47 changes: 37 additions & 10 deletions src/server_state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -668,8 +668,10 @@ int wsrep::server_state::start_sst(const std::string& sst_request,
if (server_service_.start_sst(sst_request, gtid, bypass))
{
lock.lock();
wsrep::log_warning() << "SST start failed";
state(lock, s_synced);
wsrep::log_warning() << "SST preparation failed";
// v26 API does not have JOINED event, so in anticipation of SYNCED
// we must do it here.
state(lock, s_joined);
ret = 1;
}
return ret;
Expand All @@ -683,6 +685,8 @@ void wsrep::server_state::sst_sent(const wsrep::gtid& gtid, int error)
wsrep::log_info() << "SST sending failed: " << error;

wsrep::unique_lock<wsrep::mutex> lock(mutex_);
// v26 API does not have JOINED event, so in anticipation of SYNCED
// we must do it here.
state(lock, s_joined);
lock.unlock();
enum provider::status const retval(provider().sst_sent(gtid, error));
Expand Down Expand Up @@ -719,7 +723,6 @@ void wsrep::server_state::sst_received(wsrep::client_service& cs,
assert(init_initialized_);
}
}
state(lock, s_joined);
lock.unlock();

if (id_.is_undefined())
Expand All @@ -731,6 +734,14 @@ void wsrep::server_state::sst_received(wsrep::client_service& cs,

gtid = server_service_.get_position(cs);
wsrep::log_info() << "Recovered position from storage: " << gtid;
if (gtid.seqno() >= connected_gtid().seqno())
{
/* Now the node has all the data the cluster has: part in
* storage, part in replication event queue. */
lock.lock();
state(lock, s_joined);
lock.unlock();
}
wsrep::view const v(server_service_.get_view(cs, id_));
wsrep::log_info() << "Recovered view from SST:\n" << v;

Expand Down Expand Up @@ -811,6 +822,21 @@ void wsrep::server_state::last_committed_gtid(const wsrep::gtid& gtid)
last_committed_gtid_.seqno() + 1 == gtid.seqno());
last_committed_gtid_ = gtid;
cond_.notify_all();

if (state(lock) < s_joined)
{
// wsrep API does not have a dedicated JOINED event, but we know
// that the node is joined when it commits GTID at which it connected
assert(state(lock) == s_initialized || state(lock) == s_joiner);
if (gtid.seqno() == connected_gtid().seqno())
{
state(lock, s_joined);
}
else
{
assert(gtid.seqno() < connected_gtid().seqno());
}
}
}

wsrep::gtid wsrep::server_state::last_committed_gtid() const
Expand Down Expand Up @@ -1084,13 +1110,14 @@ void wsrep::server_state::on_sync()
{
case s_synced:
break;
case s_connected:
state(lock, s_joiner);
// fall through
case s_joiner:
state(lock, s_initializing);
case s_connected: // Seed node path: provider becomes
state(lock, s_joiner); // synced with itself before anyhting
// fall through // else. Then goes DB initialization.
case s_joiner: // |
state(lock, s_initializing); // V
break;
case s_donor:
assert(false); // this should never happen
state(lock, s_joined);
state(lock, s_synced);
break;
Expand Down Expand Up @@ -1357,14 +1384,14 @@ void wsrep::server_state::state(
assert(lock.owns_lock());
static const char allowed[n_states_][n_states_] =
{
/* dis, ing, ized, cted, jer, jed, dor, sed, ding */
/* dis, ing, ized, cted, jer, jed, dor, sed, ding to/from */
{ 0, 1, 0, 1, 0, 0, 0, 0, 0}, /* dis */
{ 1, 0, 1, 0, 0, 0, 0, 0, 1}, /* ing */
{ 1, 0, 0, 1, 0, 1, 0, 0, 1}, /* ized */
{ 1, 0, 0, 1, 1, 0, 0, 1, 1}, /* cted */
{ 1, 1, 0, 0, 0, 1, 0, 0, 1}, /* jer */
{ 1, 0, 0, 1, 0, 0, 1, 1, 1}, /* jed */
{ 1, 0, 0, 1, 0, 1, 0, 1, 1}, /* dor */
{ 1, 0, 0, 1, 0, 1, 0, 0, 1}, /* dor */
{ 1, 0, 0, 1, 0, 1, 1, 0, 1}, /* sed */
{ 1, 0, 0, 0, 0, 0, 0, 0, 0} /* ding */
};
Expand Down

0 comments on commit 714a4bc

Please sign in to comment.