Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ZOOKEEPER-4646: Committed txns may still be lost if followers crash after replying ACK of NEWLEADER but before writing txns to disk #1993

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.util.concurrent.LinkedBlockingQueue;
import javax.management.JMException;
import org.apache.jute.Record;
import org.apache.zookeeper.common.Time;
import org.apache.zookeeper.jmx.MBeanRegistry;
import org.apache.zookeeper.metrics.MetricsContext;
import org.apache.zookeeper.server.ExitCode;
Expand Down Expand Up @@ -88,6 +89,18 @@ public void logRequest(TxnHeader hdr, Record txn, TxnDigest digest) {
syncProcessor.processRequest(request);
}

public Request logRequestBeforeAckNewleader(TxnHeader hdr, Record txn, TxnDigest digest) throws IOException {
Request request = new Request(hdr.getClientId(), hdr.getCxid(), hdr.getType(), hdr, txn, hdr.getZxid());
request.setTxnDigest(digest);
if ((request.zxid & 0xffffffffL) != 0) {
pendingTxns.add(request);
}
long startProcessTime = Time.currentElapsedTime();
getZKDatabase().append(request);
ServerMetrics.getMetrics().SYNC_PROCESS_TIME.add(Time.currentElapsedTime() - startProcessTime);
return request;
}

/**
* When a COMMIT message is received, eventually this method is called,
* which matches up the zxid from the COMMIT with (hopefully) the head of
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,7 @@ protected void syncWithLeader(long newLeaderZxid) throws Exception {
readPacket(qp);
AlphaCanisMajoris marked this conversation as resolved.
Show resolved Hide resolved
Deque<Long> packetsCommitted = new ArrayDeque<>();
Deque<PacketInFlight> packetsNotCommitted = new ArrayDeque<>();
Deque<Request> requestsToBeReplied = new ArrayDeque<>();
synchronized (zk) {
if (qp.getType() == Leader.DIFF) {
LOG.info("Getting a diff from the leader 0x{}", Long.toHexString(qp.getZxid()));
Expand Down Expand Up @@ -750,16 +751,18 @@ protected void syncWithLeader(long newLeaderZxid) throws Exception {
//Anything after this needs to go to the transaction log, not applied directly in memory
isPreZAB1_0 = false;

// ZOOKEEPER-3911: make sure sync the uncommitted logs before commit them (ACK NEWLEADER).
// ZOOKEEPER-3911 & 4646: make sure sync the uncommitted logs before commit them (ACK NEWLEADER).
sock.setSoTimeout(self.tickTime * self.syncLimit);
self.setSyncMode(QuorumPeer.SyncMode.NONE);
zk.startupWithoutServing();
if (zk instanceof FollowerZooKeeperServer) {
FollowerZooKeeperServer fzk = (FollowerZooKeeperServer) zk;
for (PacketInFlight p : packetsNotCommitted) {
fzk.logRequest(p.hdr, p.rec, p.digest);
requestsToBeReplied.add(fzk.logRequestBeforeAckNewleader(p.hdr, p.rec, p.digest));
}
packetsNotCommitted.clear();
// persist the transaction logs
fzk.getZKDatabase().commit();
}

writePacket(new QuorumPacket(Leader.ACK, newLeaderZxid, null, null), true);
Expand All @@ -781,6 +784,16 @@ protected void syncWithLeader(long newLeaderZxid) throws Exception {

// We need to log the stuff that came in between the snapshot and the uptodate
if (zk instanceof FollowerZooKeeperServer) {
// Reply queued ACKs that are generated before replying ACK of NEWLEADER
// ZOOKEEPER-4685: make sure to reply ACK of PROPOSAL after replying ACK of NEWLEADER.
for (Request si : requestsToBeReplied) {
QuorumPacket p = new QuorumPacket(Leader.ACK, si.getHdr().getZxid(), null, null);
si.logLatency(ServerMetrics.getMetrics().PROPOSAL_ACK_CREATION_LATENCY);
writePacket(p, false);
}
requestsToBeReplied.clear();
writePacket(null, true);

FollowerZooKeeperServer fzk = (FollowerZooKeeperServer) zk;
for (PacketInFlight p : packetsNotCommitted) {
fzk.logRequest(p.hdr, p.rec, p.digest);
Expand Down