From 427e9a74df535d512bd8c4a3b4cb2afa2aab20e5 Mon Sep 17 00:00:00 2001 From: Andrey Yegorov Date: Tue, 4 Jun 2024 15:51:08 -0700 Subject: [PATCH 1/3] another attempt --- .../apache/bookkeeper/replication/BookieAutoRecoveryTest.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/BookieAutoRecoveryTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/BookieAutoRecoveryTest.java index ccb262ed268..37ae6312206 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/BookieAutoRecoveryTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/BookieAutoRecoveryTest.java @@ -117,6 +117,9 @@ public void setUp() throws Exception { mFactory = metadataClientDriver.getLedgerManagerFactory(); underReplicationManager = mFactory.newLedgerUnderreplicationManager(); ledgerManager = mFactory.newLedgerManager(); + + // ensure Auditor runs and updates known bookies + getAuditor(10, TimeUnit.SECONDS).submitAuditTask().get(); } @Override From 3ad0897cca58d03a65e42338f3283a93bff71c31 Mon Sep 17 00:00:00 2001 From: Andrey Yegorov Date: Tue, 4 Jun 2024 17:30:20 -0700 Subject: [PATCH 2/3] Another approach: make sure Auditor starts with all bookies, not just the running ones --- .../java/org/apache/bookkeeper/replication/Auditor.java | 9 ++++++++- .../bookkeeper/replication/BookieAutoRecoveryTest.java | 3 --- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/Auditor.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/Auditor.java index 997baf33384..9a74d3ecac8 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/Auditor.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/Auditor.java @@ -36,6 +36,8 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.BiConsumer; +import java.util.stream.Collectors; + import org.apache.bookkeeper.client.BKException; import org.apache.bookkeeper.client.BookKeeper; import org.apache.bookkeeper.client.BookKeeperAdmin; @@ -386,7 +388,12 @@ public void start() { try { watchBookieChanges(); - knownBookies = getAvailableBookies(); + // Start with all available bookies + // to handle situations where the auditor + // is started after some bookies have already failed + knownBookies = admin.getAllBookies().stream() + .map(BookieId::toString) + .collect(Collectors.toList()); this.ledgerUnderreplicationManager .notifyLostBookieRecoveryDelayChanged(new LostBookieRecoveryDelayChangedCb()); } catch (BKException bke) { diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/BookieAutoRecoveryTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/BookieAutoRecoveryTest.java index 37ae6312206..ccb262ed268 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/BookieAutoRecoveryTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/BookieAutoRecoveryTest.java @@ -117,9 +117,6 @@ public void setUp() throws Exception { mFactory = metadataClientDriver.getLedgerManagerFactory(); underReplicationManager = mFactory.newLedgerUnderreplicationManager(); ledgerManager = mFactory.newLedgerManager(); - - // ensure Auditor runs and updates known bookies - getAuditor(10, TimeUnit.SECONDS).submitAuditTask().get(); } @Override From e29e4e5578ed4a5ae4e2b3a720fa731b0904e1e9 Mon Sep 17 00:00:00 2001 From: Andrey Yegorov Date: Tue, 4 Jun 2024 17:39:03 -0700 Subject: [PATCH 3/3] checkstyle --- .../src/main/java/org/apache/bookkeeper/replication/Auditor.java | 1 - 1 file changed, 1 deletion(-) diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/Auditor.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/Auditor.java index 9a74d3ecac8..9c6be197550 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/Auditor.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/Auditor.java @@ -37,7 +37,6 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.BiConsumer; import java.util.stream.Collectors; - import org.apache.bookkeeper.client.BKException; import org.apache.bookkeeper.client.BookKeeper; import org.apache.bookkeeper.client.BookKeeperAdmin;