Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ZOOKEEPER-3023: Sync and commit diff log entries before NEWLEADER ack #1848

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,10 @@ public TxnHeader getHeader() {
public TxnDigest getDigest() {
return digest;
}

public Request toRequest() {
Request request = new Request(header.getClientId(), header.getCxid(), header.getType(), header, txn, header.getZxid());
request.setTxnDigest(digest);
return request;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.apache.zookeeper.server.quorum;

import java.io.IOException;
import java.util.List;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.LinkedBlockingQueue;
import javax.management.JMException;
Expand All @@ -31,6 +32,7 @@
import org.apache.zookeeper.server.RequestProcessor;
import org.apache.zookeeper.server.ServerMetrics;
import org.apache.zookeeper.server.SyncRequestProcessor;
import org.apache.zookeeper.server.TxnLogEntry;
import org.apache.zookeeper.server.ZKDatabase;
import org.apache.zookeeper.server.persistence.FileTxnSnapLog;
import org.apache.zookeeper.txn.TxnDigest;
Expand Down Expand Up @@ -88,6 +90,20 @@ public void logRequest(TxnHeader hdr, Record txn, TxnDigest digest) {
syncProcessor.processRequest(request);
}

public void syncAndCommitInitialLogEntries(List<TxnLogEntry> logEntries) throws IOException {
State state = this.state;
if (state != State.INITIAL) {
String msg = String.format("illegal state %s to sync initial log entries", state);
throw new IllegalStateException(msg);
}
for (TxnLogEntry logEntry : logEntries) {
Request request = logEntry.toRequest();
getZKDatabase().append(request);
processTxn(logEntry.getHeader(), logEntry.getTxn());
}
getZKDatabase().commit();
}

/**
* When a COMMIT message is received, eventually this method is called,
* which matches up the zxid from the COMMIT with (hopefully) the head of
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ Optional<ServerSocket> createServerSocket(InetSocketAddress address, boolean por

/**
* This message type is sent by the leader to indicate that the follower is
* now uptodate andt can start responding to clients.
* now uptodate and can start responding to clients.
*/
static final int UPTODATE = 12;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@
import java.net.Socket;
import java.nio.ByteBuffer;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Deque;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
Expand Down Expand Up @@ -82,6 +84,9 @@ static class PacketInFlight {
Record rec;
TxnDigest digest;

TxnLogEntry toLogEntry() {
return new TxnLogEntry(rec, hdr, digest);
}
}

QuorumPeer self;
Expand Down Expand Up @@ -750,17 +755,37 @@ protected void syncWithLeader(long newLeaderZxid) throws Exception {
//Anything after this needs to go to the transaction log, not applied directly in memory
isPreZAB1_0 = false;

// ZOOKEEPER-3911: make sure sync the uncommitted logs before commit them (ACK NEWLEADER).
if (zk instanceof FollowerZooKeeperServer && !packetsCommitted.isEmpty()) {
List<TxnLogEntry> entries = new ArrayList<>(packetsCommitted.size());
// Pop log entries from packetsNotCommitted according to packetsCommitted.
// In case of mismatch, log warning and keep packetsNotCommitted untouched.
while (!packetsCommitted.isEmpty()) {
long zxid = packetsCommitted.removeFirst();
pif = packetsNotCommitted.peekFirst();
if (pif == null) {
LOG.warn("Committing 0x{}, but got no proposal", Long.toHexString(zxid));
continue;
} else if (pif.hdr.getZxid() != zxid) {
LOG.warn(
"Committing 0x{}, but next proposal is 0x{}",
Long.toHexString(zxid),
Long.toHexString(pif.hdr.getZxid()));
continue;
}
packetsNotCommitted.removeFirst();
entries.add(pif.toLogEntry());
}
FollowerZooKeeperServer fzk = (FollowerZooKeeperServer) zk;
fzk.syncAndCommitInitialLogEntries(entries);
}

// We almost complete the synchronization phase, all that's left is UPTODATE
// which is a client serving valve and interleaved with broadcast phase.
//
// We are ready for broadcast phase on our behalf now except serving client requests.
sock.setSoTimeout(self.tickTime * self.syncLimit);
self.setSyncMode(QuorumPeer.SyncMode.NONE);
zk.startupWithoutServing();
if (zk instanceof FollowerZooKeeperServer) {
FollowerZooKeeperServer fzk = (FollowerZooKeeperServer) zk;
for (PacketInFlight p : packetsNotCommitted) {
fzk.logRequest(p.hdr, p.rec, p.digest);
}
packetsNotCommitted.clear();
}

writePacket(new QuorumPacket(Leader.ACK, newLeaderZxid, null, null), true);
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -743,8 +743,8 @@ public void converseWithFollower(InputArchive ia, OutputArchive oa, Follower f)

readPacketSkippingPing(ia, qp);
assertEquals(Leader.ACKEPOCH, qp.getType());
assertEquals(0, qp.getZxid());
assertEquals(ZxidUtils.makeZxid(0, 0), ByteBuffer.wrap(qp.getData()).getInt());
assertEquals(ZxidUtils.makeZxid(0, 0), qp.getZxid());
assertEquals(0, ByteBuffer.wrap(qp.getData()).getInt());
assertEquals(1, f.self.getAcceptedEpoch());
assertEquals(0, f.self.getCurrentEpoch());

Expand Down Expand Up @@ -779,24 +779,11 @@ public void converseWithFollower(InputArchive ia, OutputArchive oa, Follower f)
assertEquals(1, f.self.getAcceptedEpoch());
assertEquals(1, f.self.getCurrentEpoch());

//Wait for the transactions to be written out. The thread that writes them out
// does not send anything back when it is done.
long start = System.currentTimeMillis();
while (createSessionZxid != f.fzk.getLastProcessedZxid()
&& (System.currentTimeMillis() - start) < 50) {
Thread.sleep(1);
}

assertEquals(createSessionZxid, f.fzk.getLastProcessedZxid());

// Make sure the data was recorded in the filesystem ok
ZKDatabase zkDb2 = new ZKDatabase(new FileTxnSnapLog(logDir, snapDir));
start = System.currentTimeMillis();
zkDb2.loadDataBase();
while (zkDb2.getSessionWithTimeOuts().isEmpty() && (System.currentTimeMillis() - start) < 50) {
Thread.sleep(1);
zkDb2.loadDataBase();
}
LOG.info("zkdb2 sessions:{}", zkDb2.getSessions());
LOG.info("zkdb2 with timeouts:{}", zkDb2.getSessionWithTimeOuts());
assertNotNull(zkDb2.getSessionWithTimeOuts().get(4L));
Expand Down