Skip to content

Commit

Permalink
ZOOKEEPER-2623: Fix database corruption caused by quorum check (#1988)
Browse files Browse the repository at this point in the history
  • Loading branch information
kezhuw authored Sep 10, 2023
1 parent e0890d0 commit b31f776
Show file tree
Hide file tree
Showing 10 changed files with 136 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
import org.apache.zookeeper.data.Id;
import org.apache.zookeeper.data.Stat;
import org.apache.zookeeper.proto.AddWatchRequest;
import org.apache.zookeeper.proto.CheckVersionRequest;
import org.apache.zookeeper.proto.CheckWatchesRequest;
import org.apache.zookeeper.proto.Create2Response;
import org.apache.zookeeper.proto.CreateResponse;
Expand Down Expand Up @@ -354,8 +355,10 @@ public void processRequest(Request request) {
}
case OpCode.check: {
lastOp = "CHEC";
rsp = new SetDataResponse(rc.stat);
err = Code.get(rc.err);
CheckVersionRequest checkVersionRequest = request.readRequestRecord(CheckVersionRequest::new);
path = checkVersionRequest.getPath();
handleCheckVersionRequest(checkVersionRequest, cnxn, request.authInfo);
requestPathMetricsCollector.registerRequest(request.type, path);
break;
}
case OpCode.exists: {
Expand Down Expand Up @@ -643,6 +646,19 @@ private Record handleGetDataRequest(Record request, ServerCnxn cnxn, List<Id> au
return new GetDataResponse(b, stat);
}

private void handleCheckVersionRequest(CheckVersionRequest request, ServerCnxn cnxn, List<Id> authInfo) throws KeeperException {
String path = request.getPath();
DataNode n = zks.getZKDatabase().getNode(path);
if (n == null) {
throw new KeeperException.NoNodeException();
}
zks.checkACL(cnxn, zks.getZKDatabase().aclForNode(n), ZooDefs.Perms.READ, authInfo, path, null);
int version = request.getVersion();
if (version != -1 && version != n.stat.getVersion()) {
throw new KeeperException.BadVersionException(path);
}
}

private boolean closeSession(ServerCnxnFactory serverCnxnFactory, long sessionId) {
if (serverCnxnFactory == null) {
return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -798,10 +798,6 @@ private void pRequestHelper(Request request) {
SetACLRequest setAclRequest = request.readRequestRecord(SetACLRequest::new);
pRequest2Txn(request.type, zks.getNextZxid(), request, setAclRequest);
break;
case OpCode.check:
CheckVersionRequest checkRequest = request.readRequestRecord(CheckVersionRequest::new);
pRequest2Txn(request.type, zks.getNextZxid(), request, checkRequest);
break;
case OpCode.multi:
MultiOperationRecord multiRequest;
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,6 @@ public boolean isQuorum() {
case OpCode.deleteContainer:
case OpCode.setACL:
case OpCode.setData:
case OpCode.check:
case OpCode.multi:
case OpCode.reconfig:
return true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,6 @@ protected boolean needCommit(Request request) {
case OpCode.reconfig:
case OpCode.multi:
case OpCode.setACL:
case OpCode.check:
return true;
case OpCode.sync:
return matchSyncs;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,6 @@ public void run() {
case OpCode.reconfig:
case OpCode.setACL:
case OpCode.multi:
case OpCode.check:
zks.getFollower().request(request);
break;
case OpCode.createSession:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@ public void run() {
case OpCode.reconfig:
case OpCode.setACL:
case OpCode.multi:
case OpCode.check:
zks.getObserver().request(request);
break;
case OpCode.createSession:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ public void run() {
case OpCode.reconfig:
case OpCode.setACL:
case OpCode.multi:
case OpCode.check:
sendErrorResponse(request);
continue;
case OpCode.closeSession:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,6 @@ static boolean isWriteOp(int requestType) {
case ZooDefs.OpCode.reconfig:
case ZooDefs.OpCode.setACL:
case ZooDefs.OpCode.multi:
case ZooDefs.OpCode.check:
return true;
}
return false;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.zookeeper.test;

import static org.junit.jupiter.api.Assertions.assertThrows;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.TestableZooKeeper;
import org.apache.zookeeper.ZooDefs;
import org.apache.zookeeper.data.Stat;
import org.apache.zookeeper.proto.CheckVersionRequest;
import org.apache.zookeeper.proto.ReplyHeader;
import org.apache.zookeeper.proto.RequestHeader;
import org.apache.zookeeper.server.quorum.QuorumPeer;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.TestInfo;

public class CheckTest extends ClientBase {

@BeforeEach
public void setUp(TestInfo testInfo) throws Exception {
if (testInfo.getDisplayName().contains("Cluster")) {
return;
}
super.setUp();
}

@AfterEach
public void tearDown(TestInfo testInfo) throws Exception {
if (testInfo.getDisplayName().contains("Cluster")) {
return;
}
super.tearDown();
}

@Override
public void setUp() throws Exception {
}

@Override
public void tearDown() throws Exception {
}

private static void checkVersion(TestableZooKeeper zk, String path, int version) throws Exception {
RequestHeader header = new RequestHeader();
header.setType(ZooDefs.OpCode.check);
CheckVersionRequest request = new CheckVersionRequest(path, version);
ReplyHeader replyHeader = zk.submitRequest(header, request, null, null);
if (replyHeader.getErr() != 0) {
throw KeeperException.create(KeeperException.Code.get(replyHeader.getErr()), path);
}
}

private void testOperations(TestableZooKeeper zk) throws Exception {
Stat stat = new Stat();
zk.getData("/", false, stat);
checkVersion(zk, "/", -1);
checkVersion(zk, "/", stat.getVersion());
assertThrows(KeeperException.BadVersionException.class, () -> {
checkVersion(zk, "/", stat.getVersion() + 1);
});
assertThrows(KeeperException.NoNodeException.class, () -> {
checkVersion(zk, "/no-node", Integer.MAX_VALUE);
});
}

@Test
public void testStandalone() throws Exception {
TestableZooKeeper zk = createClient();
testOperations(zk);
stopServer();
startServer();
createClient();
}

@Test
public void testCluster() throws Exception {
QuorumBase qb = new QuorumBase();
try {
qb.setUp(true, true);
testOperations(qb.createClient(new CountdownWatcher(), QuorumPeer.ServerState.OBSERVING));
testOperations(qb.createClient(new CountdownWatcher(), QuorumPeer.ServerState.FOLLOWING));
testOperations(qb.createClient(new CountdownWatcher(), QuorumPeer.ServerState.LEADING));
int leaderIndex = qb.getLeaderIndex();
int leaderPort = qb.getLeaderClientPort();
qb.shutdown(qb.getLeaderQuorumPeer());
qb.setupServer(leaderIndex + 1);
QuorumPeer quorumPeer = qb.getPeerList().get(leaderIndex);
quorumPeer.start();
qb.createClient("localhost:" + leaderPort, 2 * CONNECTION_TIMEOUT);
} finally {
try {
qb.tearDown();
} catch (Exception ignored) {}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,10 @@ protected TestableZooKeeper createClient(CountdownWatcher watcher) throws IOExce
private List<ZooKeeper> allClients;
private boolean allClientsSetup = false;

protected TestableZooKeeper createClient(String hp, int timeout) throws IOException, InterruptedException {
return createClient(new CountdownWatcher(), hp, timeout);
}

protected TestableZooKeeper createClient(CountdownWatcher watcher, String hp) throws IOException, InterruptedException {
return createClient(watcher, hp, CONNECTION_TIMEOUT);
}
Expand Down

0 comments on commit b31f776

Please sign in to comment.