From ac850882888976d71b557103256f0fe94a478ac6 Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Thu, 16 Jun 2022 17:38:05 -0700 Subject: [PATCH 01/45] ZkStateReader changes to avoid race condition for collectionWatches and watchedCollectionStates --- .../solr/common/cloud/ZkStateReader.java | 163 +++++++++--------- 1 file changed, 77 insertions(+), 86 deletions(-) diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java index a2f9864525b..091a78df019 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java @@ -171,10 +171,6 @@ public class ZkStateReader implements SolrCloseable { public static final String SHARD_LEADERS_ZKNODE = "leaders"; public static final String ELECTION_NODE = "election"; - /** "Interesting" and actively watched Collections. */ - private final ConcurrentHashMap watchedCollectionStates = - new ConcurrentHashMap<>(); - /** "Interesting" but not actively watched Collections. */ private final ConcurrentHashMap lazyCollectionStates = new ConcurrentHashMap<>(); @@ -195,8 +191,11 @@ public class ZkStateReader implements SolrCloseable { private final Runnable securityNodeListener; - private ConcurrentHashMap> collectionWatches = - new ConcurrentHashMap<>(); + /** + * Collections with active watches. The {@link DocCollectionWatch} inside for each collection might also contain the + * latest DocCollection (state) observed + */ + private DocCollectionWatches collectionWatches = new DocCollectionWatches(); // named this observers so there's less confusion between CollectionPropsWatcher map and the // PropsWatcher map. @@ -246,6 +245,59 @@ public boolean canBeRemoved() { } } + /** + * A ConcurrentHashMap of active watcher by collection name + * + * Each watcher DocCollectionWatch also contains the latest DocCollection (state) observed + */ + private static class DocCollectionWatches extends ConcurrentHashMap> { + /** + * Gets the DocCollection (state) of the collection which the corresponding watch last observed + * @param collection + * @return + */ + private DocCollection getDocCollection(String collection) { + DocCollectionWatch watch = get(collection); + return watch != null ? watch.currentDoc : null; + } + + /** + * Updates the latest observed DocCollection (state) of the {@link DocCollectionWatch} if the collection is being watched + * @param collection + * @param newState the new DocCollection (state) observed + * @return whether an active watch exists for such collection + */ + private boolean updateDocCollection(String collection, DocCollection newState) { + DocCollectionWatch watch = get(collection); + if (watch != null) { + DocCollection oldState = watch.currentDoc; + if (oldState == null && newState == null) { + //OK, the collection not yet exist in ZK + } else if (oldState == null) { + log.debug("Add data for [{}] ver [{}]", collection, newState.getZNodeVersion()); + watch.currentDoc = newState; + } else if (newState == null) { + log.debug("Removing cached collection state for [{}]", collection); + watch.currentDoc = null; + } else { //both new and old states are non-null + int oldCVersion = oldState.getPerReplicaStates() == null ? -1 : oldState.getPerReplicaStates().cversion; + int newCVersion = newState.getPerReplicaStates() == null ? -1 : newState.getPerReplicaStates().cversion; + if (oldState.getZNodeVersion() < newState.getZNodeVersion() || oldCVersion < newCVersion) { + watch.currentDoc = newState; + log.debug("Updating data for [{}] from [{}] to [{}]", collection, oldState.getZNodeVersion(), newState.getZNodeVersion()); + } + } + return true; + } else { + return false; + } + } + } + + private static class DocCollectionWatch extends CollectionWatch { + private DocCollection currentDoc; + } + public static final Set KNOWN_CLUSTER_PROPS = Set.of( URL_SCHEME, @@ -327,11 +379,11 @@ public void forciblyRefreshAllClusterStateSlow() throws KeeperException, Interru refreshCollectionList(null); refreshLiveNodes(null); // Need a copy so we don't delete from what we're iterating over. - Collection safeCopy = new ArrayList<>(watchedCollectionStates.keySet()); + Collection safeCopy = new ArrayList<>(collectionWatches.keySet()); Set updatedCollections = new HashSet<>(); for (String coll : safeCopy) { DocCollection newState = fetchCollectionState(coll, null); - if (updateWatchedCollection(coll, newState)) { + if (collectionWatches.updateDocCollection(coll, newState)) { updatedCollections.add(coll); } } @@ -370,11 +422,11 @@ public void forceUpdateCollection(String collection) if (ref.get() != null) { return; } - } else if (watchedCollectionStates.containsKey(collection)) { + } else if (collectionWatches.containsKey(collection)) { // Exists as a watched collection, force a refresh. log.debug("Forcing refresh of watched collection state for {}", collection); DocCollection newState = fetchCollectionState(collection, null); - if (updateWatchedCollection(collection, newState)) { + if (collectionWatches.updateDocCollection(collection, newState)) { constructState(Collections.singleton(collection)); } } else { @@ -398,7 +450,7 @@ public Integer compareStateVersions(String coll, int version) { DocCollection nu = getCollectionLive(coll); if (nu == null) return -1; if (nu.getZNodeVersion() > collection.getZNodeVersion()) { - if (updateWatchedCollection(coll, nu)) { + if (collectionWatches.updateDocCollection(coll, nu)) { synchronized (getUpdateLock()) { constructState(Collections.singleton(coll)); } @@ -525,8 +577,10 @@ private void constructState(Set changedCollections) { Map result = new LinkedHashMap<>(); // Add collections - for (Map.Entry entry : watchedCollectionStates.entrySet()) { - result.put(entry.getKey(), new ClusterState.CollectionRef(entry.getValue())); + for (Map.Entry> entry : collectionWatches.entrySet()) { + if (entry.getValue().currentDoc != null) { //if the doc is null for the collection watch, then it should not be inserted into the state + result.putIfAbsent(entry.getKey(), new ClusterState.CollectionRef(entry.getValue().currentDoc)); + } } // Finally, add any lazy collections that aren't already accounted for. @@ -538,18 +592,16 @@ private void constructState(Set changedCollections) { if (log.isDebugEnabled()) { log.debug( - "clusterStateSet: interesting [{}] watched [{}] lazy [{}] total [{}]", + "clusterStateSet: watched [{}] lazy [{}] total [{}]", collectionWatches.keySet().size(), - watchedCollectionStates.keySet().size(), lazyCollectionStates.keySet().size(), clusterState.getCollectionStates().size()); } if (log.isTraceEnabled()) { log.trace( - "clusterStateSet: interesting [{}] watched [{}] lazy [{}] total [{}]", + "clusterStateSet: interesting [{}] lazy [{}] total [{}]", collectionWatches.keySet(), - watchedCollectionStates.keySet(), lazyCollectionStates.keySet(), clusterState.getCollectionStates()); } @@ -644,7 +696,7 @@ private void notifyCloudCollectionsListeners(boolean notifyIfSame) { private Set getCurrentCollections() { Set collections = new HashSet<>(); - collections.addAll(watchedCollectionStates.keySet()); + collections.addAll(collectionWatches.keySet()); collections.addAll(lazyCollectionStates.keySet()); return collections; } @@ -1294,7 +1346,7 @@ public void refreshAndWatch() { /** * Refresh collection state from ZK and leave a watch for future changes. As a side effect, - * updates {@link #clusterState} and {@link #watchedCollectionStates} with the results of the + * updates {@link #clusterState} and collection ref within {@link #collectionWatches} with the results of the * refresh. */ public void refreshAndWatch(EventType eventType) { @@ -1308,7 +1360,7 @@ public void refreshAndWatch(EventType eventType) { } DocCollection newState = fetchCollectionState(coll, this); - updateWatchedCollection(coll, newState); + collectionWatches.updateDocCollection(coll, newState); synchronized (getUpdateLock()) { constructState(Collections.singleton(coll)); } @@ -1332,10 +1384,10 @@ private void refreshAndWatchChildren() throws KeeperException, InterruptedExcept replicaStates = zkClient.getChildren(collectionPath, this, stat, true); PerReplicaStates newStates = new PerReplicaStates(collectionPath, stat.getCversion(), replicaStates); - DocCollection oldState = watchedCollectionStates.get(coll); + DocCollection oldState = collectionWatches.getDocCollection(coll); final DocCollection newState = oldState != null ? oldState.copyWith(newStates) : fetchCollectionState(coll, null); - updateWatchedCollection(coll, newState); + collectionWatches.updateDocCollection(coll, newState); synchronized (getUpdateLock()) { constructState(Collections.singleton(coll)); } @@ -1612,7 +1664,7 @@ public void registerCore(String collection) { (k, v) -> { if (v == null) { reconstructState.set(true); - v = new CollectionWatch<>(); + v = new DocCollectionWatch<>(); } v.coreRefCount++; return v; @@ -1640,7 +1692,6 @@ public void unregisterCore(String collection) { if (v == null) return null; if (v.coreRefCount > 0) v.coreRefCount--; if (v.canBeRemoved()) { - watchedCollectionStates.remove(collection); lazyCollectionStates.put(collection, new LazyCollectionRef(collection)); reconstructState.set(true); return null; @@ -1697,7 +1748,7 @@ public void registerDocCollectionWatcher(String collection, DocCollectionWatcher collection, (k, v) -> { if (v == null) { - v = new CollectionWatch<>(); + v = new DocCollectionWatch<>(); watchSet.set(true); } v.stateWatchers.add(stateWatcher); @@ -1724,7 +1775,7 @@ private DocCollection updatePerReplicaState(DocCollection c) { log.debug("update for a fresh per-replica-state {}", c.getName()); } DocCollection modifiedColl = c.copyWith(newPrs); - updateWatchedCollection(c.getName(), modifiedColl); + collectionWatches.updateDocCollection(c.getName(), modifiedColl); return modifiedColl; } else { return c; @@ -1919,7 +1970,6 @@ public void removeDocCollectionWatcher(String collection, DocCollectionWatcher w if (v == null) return null; v.stateWatchers.remove(watcher); if (v.canBeRemoved()) { - watchedCollectionStates.remove(collection); lazyCollectionStates.put(collection, new LazyCollectionRef(collection)); reconstructState.set(true); return null; @@ -1947,65 +1997,6 @@ Set getStateWatchers(String collection) { return watchers; } - // returns true if the state has changed - private boolean updateWatchedCollection(String coll, DocCollection newState) { - - if (newState == null) { - log.debug("Removing cached collection state for [{}]", coll); - watchedCollectionStates.remove(coll); - return true; - } - - boolean updated = false; - // CAS update loop - while (true) { - if (!collectionWatches.containsKey(coll)) { - break; - } - DocCollection oldState = watchedCollectionStates.get(coll); - if (oldState == null) { - if (watchedCollectionStates.putIfAbsent(coll, newState) == null) { - if (log.isDebugEnabled()) { - log.debug("Add data for [{}] ver [{}]", coll, newState.getZNodeVersion()); - } - updated = true; - break; - } - } else { - int oldCVersion = - oldState.getPerReplicaStates() == null ? -1 : oldState.getPerReplicaStates().cversion; - int newCVersion = - newState.getPerReplicaStates() == null ? -1 : newState.getPerReplicaStates().cversion; - if (oldState.getZNodeVersion() >= newState.getZNodeVersion() - && oldCVersion >= newCVersion) { - // no change to state, but we might have been triggered by the addition of a - // state watcher, so run notifications - updated = true; - break; - } - if (watchedCollectionStates.replace(coll, oldState, newState)) { - if (log.isDebugEnabled()) { - log.debug( - "Updating data for [{}] from [{}] to [{}]", - coll, - oldState.getZNodeVersion(), - newState.getZNodeVersion()); - } - updated = true; - break; - } - } - } - - // Resolve race with unregisterCore. - if (!collectionWatches.containsKey(coll)) { - watchedCollectionStates.remove(coll); - log.debug("Removing uninteresting collection [{}]", coll); - } - - return updated; - } - public void registerCollectionPropsWatcher( final String collection, CollectionPropsWatcher propsWatcher) { AtomicBoolean watchSet = new AtomicBoolean(false); From 98813b26a3770918a07a23ceb1aa5c7a5415b964 Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Thu, 16 Jun 2022 18:09:51 -0700 Subject: [PATCH 02/45] Fixed missing javadoc values --- .../java/org/apache/solr/common/cloud/ZkStateReader.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java index 091a78df019..b798843a2b0 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java @@ -253,8 +253,8 @@ public boolean canBeRemoved() { private static class DocCollectionWatches extends ConcurrentHashMap> { /** * Gets the DocCollection (state) of the collection which the corresponding watch last observed - * @param collection - * @return + * @param collection the collection name to get DocCollection on + * @return The last observed DocCollection(state). if null, that means there's no such collection. */ private DocCollection getDocCollection(String collection) { DocCollectionWatch watch = get(collection); @@ -263,7 +263,7 @@ private DocCollection getDocCollection(String collection) { /** * Updates the latest observed DocCollection (state) of the {@link DocCollectionWatch} if the collection is being watched - * @param collection + * @param collection the collection name * @param newState the new DocCollection (state) observed * @return whether an active watch exists for such collection */ From a14a53af4d63aa583a383b602dd5fc5ef2554897 Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Thu, 16 Jun 2022 18:14:31 -0700 Subject: [PATCH 03/45] /gradlew :solr:solrj:spotlessApply --- .../solr/common/cloud/ZkStateReader.java | 54 ++++++++++++------- 1 file changed, 35 insertions(+), 19 deletions(-) diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java index b798843a2b0..7a757af4407 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java @@ -192,8 +192,8 @@ public class ZkStateReader implements SolrCloseable { private final Runnable securityNodeListener; /** - * Collections with active watches. The {@link DocCollectionWatch} inside for each collection might also contain the - * latest DocCollection (state) observed + * Collections with active watches. The {@link DocCollectionWatch} inside for each collection + * might also contain the latest DocCollection (state) observed */ private DocCollectionWatches collectionWatches = new DocCollectionWatches(); @@ -248,13 +248,16 @@ public boolean canBeRemoved() { /** * A ConcurrentHashMap of active watcher by collection name * - * Each watcher DocCollectionWatch also contains the latest DocCollection (state) observed + *

Each watcher DocCollectionWatch also contains the latest DocCollection (state) observed */ - private static class DocCollectionWatches extends ConcurrentHashMap> { + private static class DocCollectionWatches + extends ConcurrentHashMap> { /** * Gets the DocCollection (state) of the collection which the corresponding watch last observed - * @param collection the collection name to get DocCollection on - * @return The last observed DocCollection(state). if null, that means there's no such collection. + * + * @param collection the collection name to get DocCollection on + * @return The last observed DocCollection(state). if null, that means there's no such + * collection. */ private DocCollection getDocCollection(String collection) { DocCollectionWatch watch = get(collection); @@ -262,9 +265,11 @@ private DocCollection getDocCollection(String collection) { } /** - * Updates the latest observed DocCollection (state) of the {@link DocCollectionWatch} if the collection is being watched + * Updates the latest observed DocCollection (state) of the {@link DocCollectionWatch} if the + * collection is being watched + * * @param collection the collection name - * @param newState the new DocCollection (state) observed + * @param newState the new DocCollection (state) observed * @return whether an active watch exists for such collection */ private boolean updateDocCollection(String collection, DocCollection newState) { @@ -272,19 +277,26 @@ private boolean updateDocCollection(String collection, DocCollection newState) { if (watch != null) { DocCollection oldState = watch.currentDoc; if (oldState == null && newState == null) { - //OK, the collection not yet exist in ZK + // OK, the collection not yet exist in ZK } else if (oldState == null) { log.debug("Add data for [{}] ver [{}]", collection, newState.getZNodeVersion()); watch.currentDoc = newState; } else if (newState == null) { log.debug("Removing cached collection state for [{}]", collection); watch.currentDoc = null; - } else { //both new and old states are non-null - int oldCVersion = oldState.getPerReplicaStates() == null ? -1 : oldState.getPerReplicaStates().cversion; - int newCVersion = newState.getPerReplicaStates() == null ? -1 : newState.getPerReplicaStates().cversion; - if (oldState.getZNodeVersion() < newState.getZNodeVersion() || oldCVersion < newCVersion) { + } else { // both new and old states are non-null + int oldCVersion = + oldState.getPerReplicaStates() == null ? -1 : oldState.getPerReplicaStates().cversion; + int newCVersion = + newState.getPerReplicaStates() == null ? -1 : newState.getPerReplicaStates().cversion; + if (oldState.getZNodeVersion() < newState.getZNodeVersion() + || oldCVersion < newCVersion) { watch.currentDoc = newState; - log.debug("Updating data for [{}] from [{}] to [{}]", collection, oldState.getZNodeVersion(), newState.getZNodeVersion()); + log.debug( + "Updating data for [{}] from [{}] to [{}]", + collection, + oldState.getZNodeVersion(), + newState.getZNodeVersion()); } } return true; @@ -577,9 +589,13 @@ private void constructState(Set changedCollections) { Map result = new LinkedHashMap<>(); // Add collections - for (Map.Entry> entry : collectionWatches.entrySet()) { - if (entry.getValue().currentDoc != null) { //if the doc is null for the collection watch, then it should not be inserted into the state - result.putIfAbsent(entry.getKey(), new ClusterState.CollectionRef(entry.getValue().currentDoc)); + for (Map.Entry> entry : + collectionWatches.entrySet()) { + if (entry.getValue().currentDoc + != null) { // if the doc is null for the collection watch, then it should not be inserted + // into the state + result.putIfAbsent( + entry.getKey(), new ClusterState.CollectionRef(entry.getValue().currentDoc)); } } @@ -1346,8 +1362,8 @@ public void refreshAndWatch() { /** * Refresh collection state from ZK and leave a watch for future changes. As a side effect, - * updates {@link #clusterState} and collection ref within {@link #collectionWatches} with the results of the - * refresh. + * updates {@link #clusterState} and collection ref within {@link #collectionWatches} with the + * results of the refresh. */ public void refreshAndWatch(EventType eventType) { try { From 548417cb025a9016f533c71b9f636d2eeb4d57b6 Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Thu, 16 Jun 2022 18:52:46 -0700 Subject: [PATCH 04/45] Addressed :solr:solrj:validateLogCalls violations --- .../apache/solr/common/cloud/ZkStateReader.java | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java index 7a757af4407..2348b30b746 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java @@ -279,7 +279,9 @@ private boolean updateDocCollection(String collection, DocCollection newState) { if (oldState == null && newState == null) { // OK, the collection not yet exist in ZK } else if (oldState == null) { - log.debug("Add data for [{}] ver [{}]", collection, newState.getZNodeVersion()); + if (log.isDebugEnabled()) { + log.debug("Add data for [{}] ver [{}]", collection, newState.getZNodeVersion()); + } watch.currentDoc = newState; } else if (newState == null) { log.debug("Removing cached collection state for [{}]", collection); @@ -292,11 +294,13 @@ private boolean updateDocCollection(String collection, DocCollection newState) { if (oldState.getZNodeVersion() < newState.getZNodeVersion() || oldCVersion < newCVersion) { watch.currentDoc = newState; - log.debug( - "Updating data for [{}] from [{}] to [{}]", - collection, - oldState.getZNodeVersion(), - newState.getZNodeVersion()); + if (log.isDebugEnabled()) { + log.debug( + "Updating data for [{}] from [{}] to [{}]", + collection, + oldState.getZNodeVersion(), + newState.getZNodeVersion()); + } } } return true; From 2189b3a36475d01abe6f7dcd67fda6fa7548ecdc Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Thu, 16 Jun 2022 18:55:23 -0700 Subject: [PATCH 05/45] /gradlew :solr:solrj:spotlessApply --- .../java/org/apache/solr/common/cloud/ZkStateReader.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java index 2348b30b746..6adccac59f1 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java @@ -296,10 +296,10 @@ private boolean updateDocCollection(String collection, DocCollection newState) { watch.currentDoc = newState; if (log.isDebugEnabled()) { log.debug( - "Updating data for [{}] from [{}] to [{}]", - collection, - oldState.getZNodeVersion(), - newState.getZNodeVersion()); + "Updating data for [{}] from [{}] to [{}]", + collection, + oldState.getZNodeVersion(), + newState.getZNodeVersion()); } } } From aa026d6189d2b1d0bec9befd0d2595fbadf7e75a Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Fri, 17 Jun 2022 14:46:15 -0700 Subject: [PATCH 06/45] Fixed DocCollectionWatches to keep a set of active collections to maintain consistent behavior with calls to `keySet` on the now refactored `watchedCollectionStates` --- .../solr/common/cloud/ZkStateReader.java | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java index 6adccac59f1..ca96cbaf98b 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java @@ -252,6 +252,8 @@ public boolean canBeRemoved() { */ private static class DocCollectionWatches extends ConcurrentHashMap> { + private static final Set activeCollections = ConcurrentHashMap.newKeySet(); + /** * Gets the DocCollection (state) of the collection which the corresponding watch last observed * @@ -264,6 +266,14 @@ private DocCollection getDocCollection(String collection) { return watch != null ? watch.currentDoc : null; } + /** + * Gets the active collections (collections that exist) being watched + * @return an immutable set of active collection names + */ + private Set activeCollections() { + return Collections.unmodifiableSet(activeCollections); + } + /** * Updates the latest observed DocCollection (state) of the {@link DocCollectionWatch} if the * collection is being watched @@ -283,9 +293,11 @@ private boolean updateDocCollection(String collection, DocCollection newState) { log.debug("Add data for [{}] ver [{}]", collection, newState.getZNodeVersion()); } watch.currentDoc = newState; + activeCollections.add(collection); } else if (newState == null) { log.debug("Removing cached collection state for [{}]", collection); watch.currentDoc = null; + activeCollections.remove(collection); } else { // both new and old states are non-null int oldCVersion = oldState.getPerReplicaStates() == null ? -1 : oldState.getPerReplicaStates().cversion; @@ -394,10 +406,9 @@ public void forciblyRefreshAllClusterStateSlow() throws KeeperException, Interru // No need to set watchers because we should already have watchers registered for everything. refreshCollectionList(null); refreshLiveNodes(null); - // Need a copy so we don't delete from what we're iterating over. - Collection safeCopy = new ArrayList<>(collectionWatches.keySet()); + Set updatedCollections = new HashSet<>(); - for (String coll : safeCopy) { + for (String coll : collectionWatches.activeCollections()) { DocCollection newState = fetchCollectionState(coll, null); if (collectionWatches.updateDocCollection(coll, newState)) { updatedCollections.add(coll); @@ -612,16 +623,18 @@ private void constructState(Set changedCollections) { if (log.isDebugEnabled()) { log.debug( - "clusterStateSet: watched [{}] lazy [{}] total [{}]", + "clusterStateSet: interesting [{}] watched [{}] lazy [{}] total [{}]", collectionWatches.keySet().size(), + collectionWatches.activeCollections().size(), lazyCollectionStates.keySet().size(), clusterState.getCollectionStates().size()); } if (log.isTraceEnabled()) { log.trace( - "clusterStateSet: interesting [{}] lazy [{}] total [{}]", + "clusterStateSet: interesting [{}] watched [{}] lazy [{}] total [{}]", collectionWatches.keySet(), + collectionWatches.activeCollections().size(), lazyCollectionStates.keySet(), clusterState.getCollectionStates()); } @@ -716,7 +729,7 @@ private void notifyCloudCollectionsListeners(boolean notifyIfSame) { private Set getCurrentCollections() { Set collections = new HashSet<>(); - collections.addAll(collectionWatches.keySet()); + collections.addAll(collectionWatches.activeCollections()); collections.addAll(lazyCollectionStates.keySet()); return collections; } From 07611a1c645d280cf623cad8e5659f59d403374b Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Fri, 17 Jun 2022 14:47:53 -0700 Subject: [PATCH 07/45] Code cleanup --- .../src/java/org/apache/solr/common/cloud/ZkStateReader.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java index ca96cbaf98b..b584c2264ae 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java @@ -23,7 +23,6 @@ import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collection; import java.util.Collections; import java.util.EnumSet; import java.util.HashSet; @@ -268,7 +267,8 @@ private DocCollection getDocCollection(String collection) { /** * Gets the active collections (collections that exist) being watched - * @return an immutable set of active collection names + * + * @return an immutable set of active collection names */ private Set activeCollections() { return Collections.unmodifiableSet(activeCollections); From 4a6d6a2ae2fd5424e42831bc7436aa07de55d733 Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Fri, 17 Jun 2022 15:04:31 -0700 Subject: [PATCH 08/45] forciblyRefreshAllClusterStateSlow should use key set in collectionWatches instead of watched collection states, as newly created collection might not yet be reflected in watched collection states and should be refreshed. --- .../src/java/org/apache/solr/common/cloud/ZkStateReader.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java index b584c2264ae..bef759539f5 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java @@ -408,7 +408,7 @@ public void forciblyRefreshAllClusterStateSlow() throws KeeperException, Interru refreshLiveNodes(null); Set updatedCollections = new HashSet<>(); - for (String coll : collectionWatches.activeCollections()) { + for (String coll : collectionWatches.keySet()) { DocCollection newState = fetchCollectionState(coll, null); if (collectionWatches.updateDocCollection(coll, newState)) { updatedCollections.add(coll); From 7a8f9606cac7ffa1c2b7561d89b1f778f5476890 Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Fri, 17 Jun 2022 15:44:53 -0700 Subject: [PATCH 09/45] Fixed DocCollectionWatches to keep a set of active collections to maintain consistent behavior with calls to `keySet` on the now refactored `watchedCollectionStates` --- .../java/org/apache/solr/common/cloud/ZkStateReader.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java index bef759539f5..263822de6ee 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java @@ -251,7 +251,6 @@ public boolean canBeRemoved() { */ private static class DocCollectionWatches extends ConcurrentHashMap> { - private static final Set activeCollections = ConcurrentHashMap.newKeySet(); /** * Gets the DocCollection (state) of the collection which the corresponding watch last observed @@ -271,7 +270,9 @@ private DocCollection getDocCollection(String collection) { * @return an immutable set of active collection names */ private Set activeCollections() { - return Collections.unmodifiableSet(activeCollections); + return this.entrySet().stream() + .filter((Entry> entry) -> entry.getValue().currentDoc != null) + .map(Entry::getKey).collect(Collectors.toUnmodifiableSet()); } /** @@ -293,11 +294,9 @@ private boolean updateDocCollection(String collection, DocCollection newState) { log.debug("Add data for [{}] ver [{}]", collection, newState.getZNodeVersion()); } watch.currentDoc = newState; - activeCollections.add(collection); } else if (newState == null) { log.debug("Removing cached collection state for [{}]", collection); watch.currentDoc = null; - activeCollections.remove(collection); } else { // both new and old states are non-null int oldCVersion = oldState.getPerReplicaStates() == null ? -1 : oldState.getPerReplicaStates().cversion; From eaa02113cb5cfeabe1f16b1cdabe10995c8037e1 Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Fri, 17 Jun 2022 15:46:45 -0700 Subject: [PATCH 10/45] /gradlew :solr:solrj:spotlessApply --- .../java/org/apache/solr/common/cloud/ZkStateReader.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java index 263822de6ee..61f3bed82df 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java @@ -271,8 +271,11 @@ private DocCollection getDocCollection(String collection) { */ private Set activeCollections() { return this.entrySet().stream() - .filter((Entry> entry) -> entry.getValue().currentDoc != null) - .map(Entry::getKey).collect(Collectors.toUnmodifiableSet()); + .filter( + (Entry> entry) -> + entry.getValue().currentDoc != null) + .map(Entry::getKey) + .collect(Collectors.toUnmodifiableSet()); } /** From 027436bac01a4a41074e8d3db3cbbe2683d0965e Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Fri, 17 Jun 2022 18:46:06 -0700 Subject: [PATCH 11/45] Added unit test cases --- .../cloud/overseer/ZkStateReaderTest.java | 234 +++++++++++++++++- .../solr/common/cloud/ZkStateReader.java | 2 +- 2 files changed, 227 insertions(+), 9 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java index 639c7538a46..174d2ab3374 100644 --- a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java @@ -17,21 +17,21 @@ package org.apache.solr.cloud.overseer; import java.nio.file.Path; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; +import java.util.*; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; + import org.apache.lucene.util.IOUtils; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.cloud.OverseerTest; import org.apache.solr.cloud.Stats; import org.apache.solr.cloud.ZkController; import org.apache.solr.cloud.ZkTestServer; -import org.apache.solr.common.cloud.ClusterState; -import org.apache.solr.common.cloud.DocCollection; -import org.apache.solr.common.cloud.DocRouter; -import org.apache.solr.common.cloud.SolrZkClient; -import org.apache.solr.common.cloud.ZkStateReader; +import org.apache.solr.common.cloud.*; import org.apache.solr.common.util.TimeSource; import org.apache.solr.handler.admin.ConfigSetsHandler; import org.apache.solr.util.TimeOut; @@ -196,4 +196,222 @@ public void testWatchedCollectionCreation() throws Exception { server.shutdown(); } } + + public void testForciblyRefreshAllClusterState() throws Exception { + Path zkDir = createTempDir("testForciblyRefreshAllClusterState"); + + ZkTestServer server = new ZkTestServer(zkDir); + + SolrZkClient zkClient = null; + ZkStateReader reader = null; + + try { + server.run(); + + zkClient = new SolrZkClient(server.getZkAddress(), OverseerTest.DEFAULT_CONNECTION_TIMEOUT); + ZkController.createClusterZkNodes(zkClient); + + reader = new ZkStateReader(zkClient); + reader.createClusterStateWatchersAndUpdate(); + reader.registerCore("c1"); + zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true); + + reader.forciblyRefreshAllClusterStateSlow(); + // Initially there should be no c1 collection. + assertNull(reader.getClusterState().getCollectionRef("c1")); + + ZkStateWriter writer = new ZkStateWriter(reader, new Stats()); + + // create new collection + DocCollection state = + new DocCollection( + "c1", + new HashMap<>(), + Map.of(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), + DocRouter.DEFAULT, + 0); + ZkWriteCommand wc = new ZkWriteCommand("c1", state); + writer.enqueueUpdate(reader.getClusterState(), Collections.singletonList(wc), null); + writer.writePendingUpdates(); + + assertTrue(zkClient.exists(ZkStateReader.COLLECTIONS_ZKNODE + "/c1/state.json", true)); + + reader.forciblyRefreshAllClusterStateSlow(); + ClusterState.CollectionRef ref = reader.getClusterState().getCollectionRef("c1"); + assertNotNull(ref); + assertFalse(ref.isLazilyLoaded()); + assertEquals(0, ref.get().getZNodeVersion()); + + //update the collection + state = new DocCollection( + "c1", + new HashMap<>(), + Map.of(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), + DocRouter.DEFAULT, + ref.get().getZNodeVersion()); + wc = new ZkWriteCommand("c1", state); + writer.enqueueUpdate(reader.getClusterState(), Collections.singletonList(wc), null); + writer.writePendingUpdates(); + + reader.forciblyRefreshAllClusterStateSlow(); + ref = reader.getClusterState().getCollectionRef("c1"); + assertNotNull(ref); + assertFalse(ref.isLazilyLoaded()); + assertEquals(1, ref.get().getZNodeVersion()); + + //delete the collection c1, add a collection c2 that is NOT watched + ZkWriteCommand wc1 = new ZkWriteCommand("c1", null); + + zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c2", true); + state = new DocCollection( + "c2", + new HashMap<>(), + Map.of(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), + DocRouter.DEFAULT, + 0); + ZkWriteCommand wc2 = new ZkWriteCommand("c2", state); + + writer.enqueueUpdate(reader.getClusterState(), Arrays.asList(wc1, wc2), null); + writer.writePendingUpdates(); + + reader.forciblyRefreshAllClusterStateSlow(); + ref = reader.getClusterState().getCollectionRef("c1"); + assertNull(ref); + + ref = reader.getClusterState().getCollectionRef("c2"); + assertNotNull(ref); + assertFalse(ref.isLazilyLoaded()); + assertEquals(0, ref.get().getZNodeVersion()); + } finally { + IOUtils.close(reader, zkClient); + server.shutdown(); + } + } + + public void testGetCurrentCollections() throws Exception { + Path zkDir = createTempDir("testGetCurrentCollections"); + + ZkTestServer server = new ZkTestServer(zkDir); + + SolrZkClient zkClient = null; + ZkStateReader reader = null; + + try { + server.run(); + + zkClient = new SolrZkClient(server.getZkAddress(), OverseerTest.DEFAULT_CONNECTION_TIMEOUT); + ZkController.createClusterZkNodes(zkClient); + + reader = new ZkStateReader(zkClient); + reader.createClusterStateWatchersAndUpdate(); + reader.registerCore("c1"); //listen to c1. not yet exist + zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true); + reader.forceUpdateCollection("c1"); + Set currentCollections = reader.getCurrentCollections(); + assertEquals(0, currentCollections.size()); //no active collections yet + + //now create both c1 (watched) and c2 (not watched) + DocCollection state1 = new DocCollection( + "c1", + new HashMap<>(), + Map.of(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), + DocRouter.DEFAULT, + 0); + ZkWriteCommand wc1 = new ZkWriteCommand("c1", state1); + DocCollection state2 = new DocCollection( + "c2", + new HashMap<>(), + Map.of(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), + DocRouter.DEFAULT, + 0); + + //do not listen to c2 + zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c2", true); + ZkWriteCommand wc2 = new ZkWriteCommand("c2", state2); + + ZkStateWriter writer = new ZkStateWriter(reader, new Stats()); + writer.enqueueUpdate(reader.getClusterState(), Arrays.asList(wc1, wc2), null); + writer.writePendingUpdates(); + + reader.forceUpdateCollection("c1"); + reader.forceUpdateCollection("c2"); + currentCollections = reader.getCurrentCollections(); //should detect both collections (c1 watched, c2 lazy loaded) + assertEquals(2, currentCollections.size()); + } finally { + IOUtils.close(reader, zkClient); + server.shutdown(); + } + } + + public void testWatchRaceCondition() throws Exception { + final int RUN_COUNT = 1000; + Path zkDir = createTempDir("testConcurrencyWatch"); + + ZkTestServer server = new ZkTestServer(zkDir); + + SolrZkClient zkClient = null; + ZkStateReader reader = null; + ExecutorService executorService = Executors.newSingleThreadExecutor(); + + try { + server.run(); + + zkClient = new SolrZkClient(server.getZkAddress(), OverseerTest.DEFAULT_CONNECTION_TIMEOUT); + ZkController.createClusterZkNodes(zkClient); + + reader = new ZkStateReader(zkClient); + final ZkStateReader readerRef = reader; + reader.createClusterStateWatchersAndUpdate(); + zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true); + + //start another thread to constantly updating the state + final AtomicBoolean stopMutatingThread = new AtomicBoolean(false); + final ZkStateWriter writer = new ZkStateWriter(reader, new Stats()); + final AtomicInteger updateCounts = new AtomicInteger(0); + final AtomicReference updateException = new AtomicReference<>(); + executorService.submit(() -> { + try { + ClusterState clusterState = readerRef.getClusterState(); + while (!stopMutatingThread.get()) { + DocCollection collection = clusterState.getCollectionOrNull("c1"); + int currentVersion = collection != null? collection.getZNodeVersion() : 0; + System.out.println("current version " + currentVersion); + // create new collection + DocCollection state = + new DocCollection( + "c1", + new HashMap<>(), + Map.of(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), + DocRouter.DEFAULT, + currentVersion); + ZkWriteCommand wc = new ZkWriteCommand("c1", state); + writer.enqueueUpdate(clusterState, Collections.singletonList(wc), null); + clusterState = writer.writePendingUpdates(); + } + } catch (Exception e) { + updateException.set(e); + } + return null; + }); + executorService.shutdown(); + + DocCollectionWatcher dummyWatcher = collection -> false; //do not remove itself + for (int i = 0; i < RUN_COUNT; i++) { + reader.registerDocCollectionWatcher("c1", dummyWatcher); + TimeUnit.MILLISECONDS.sleep(10); + reader.removeDocCollectionWatcher("c1", dummyWatcher); + assert(reader.getClusterState().getCollectionRef("c1").isLazilyLoaded()); //it should always be lazy loaded, as the collection is not watched anymore + } + + stopMutatingThread.set(true); + if (updateException.get() != null) { + throw(updateException.get()); + } + + } finally { + IOUtils.close(reader, zkClient); + executorService.awaitTermination(10, TimeUnit.SECONDS); + server.shutdown(); + } + } } diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java index 61f3bed82df..f9d6590f1c0 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java @@ -729,7 +729,7 @@ private void notifyCloudCollectionsListeners(boolean notifyIfSame) { } } - private Set getCurrentCollections() { + public Set getCurrentCollections() { Set collections = new HashSet<>(); collections.addAll(collectionWatches.activeCollections()); collections.addAll(lazyCollectionStates.keySet()); From d94a38b703636b537896b9e46823f6099c1d36c7 Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Fri, 17 Jun 2022 18:48:23 -0700 Subject: [PATCH 12/45] ./gradlew :solr:solrj:spotlessApply --- .../cloud/overseer/ZkStateReaderTest.java | 122 ++++++++++-------- 1 file changed, 67 insertions(+), 55 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java index 174d2ab3374..616d5da053b 100644 --- a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java @@ -24,7 +24,6 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; - import org.apache.lucene.util.IOUtils; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.cloud.OverseerTest; @@ -224,12 +223,12 @@ public void testForciblyRefreshAllClusterState() throws Exception { // create new collection DocCollection state = - new DocCollection( - "c1", - new HashMap<>(), - Map.of(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), - DocRouter.DEFAULT, - 0); + new DocCollection( + "c1", + new HashMap<>(), + Map.of(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), + DocRouter.DEFAULT, + 0); ZkWriteCommand wc = new ZkWriteCommand("c1", state); writer.enqueueUpdate(reader.getClusterState(), Collections.singletonList(wc), null); writer.writePendingUpdates(); @@ -242,12 +241,13 @@ public void testForciblyRefreshAllClusterState() throws Exception { assertFalse(ref.isLazilyLoaded()); assertEquals(0, ref.get().getZNodeVersion()); - //update the collection - state = new DocCollection( - "c1", - new HashMap<>(), - Map.of(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), - DocRouter.DEFAULT, + // update the collection + state = + new DocCollection( + "c1", + new HashMap<>(), + Map.of(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), + DocRouter.DEFAULT, ref.get().getZNodeVersion()); wc = new ZkWriteCommand("c1", state); writer.enqueueUpdate(reader.getClusterState(), Collections.singletonList(wc), null); @@ -259,16 +259,17 @@ public void testForciblyRefreshAllClusterState() throws Exception { assertFalse(ref.isLazilyLoaded()); assertEquals(1, ref.get().getZNodeVersion()); - //delete the collection c1, add a collection c2 that is NOT watched + // delete the collection c1, add a collection c2 that is NOT watched ZkWriteCommand wc1 = new ZkWriteCommand("c1", null); zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c2", true); - state = new DocCollection( - "c2", - new HashMap<>(), - Map.of(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), - DocRouter.DEFAULT, - 0); + state = + new DocCollection( + "c2", + new HashMap<>(), + Map.of(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), + DocRouter.DEFAULT, + 0); ZkWriteCommand wc2 = new ZkWriteCommand("c2", state); writer.enqueueUpdate(reader.getClusterState(), Arrays.asList(wc1, wc2), null); @@ -304,28 +305,30 @@ public void testGetCurrentCollections() throws Exception { reader = new ZkStateReader(zkClient); reader.createClusterStateWatchersAndUpdate(); - reader.registerCore("c1"); //listen to c1. not yet exist + reader.registerCore("c1"); // listen to c1. not yet exist zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true); reader.forceUpdateCollection("c1"); Set currentCollections = reader.getCurrentCollections(); - assertEquals(0, currentCollections.size()); //no active collections yet - - //now create both c1 (watched) and c2 (not watched) - DocCollection state1 = new DocCollection( + assertEquals(0, currentCollections.size()); // no active collections yet + + // now create both c1 (watched) and c2 (not watched) + DocCollection state1 = + new DocCollection( "c1", new HashMap<>(), Map.of(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), DocRouter.DEFAULT, 0); ZkWriteCommand wc1 = new ZkWriteCommand("c1", state1); - DocCollection state2 = new DocCollection( + DocCollection state2 = + new DocCollection( "c2", new HashMap<>(), Map.of(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), DocRouter.DEFAULT, 0); - //do not listen to c2 + // do not listen to c2 zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c2", true); ZkWriteCommand wc2 = new ZkWriteCommand("c2", state2); @@ -335,7 +338,9 @@ public void testGetCurrentCollections() throws Exception { reader.forceUpdateCollection("c1"); reader.forceUpdateCollection("c2"); - currentCollections = reader.getCurrentCollections(); //should detect both collections (c1 watched, c2 lazy loaded) + currentCollections = + reader.getCurrentCollections(); // should detect both collections (c1 watched, c2 lazy + // loaded) assertEquals(2, currentCollections.size()); } finally { IOUtils.close(reader, zkClient); @@ -364,48 +369,55 @@ public void testWatchRaceCondition() throws Exception { reader.createClusterStateWatchersAndUpdate(); zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true); - //start another thread to constantly updating the state + // start another thread to constantly updating the state final AtomicBoolean stopMutatingThread = new AtomicBoolean(false); final ZkStateWriter writer = new ZkStateWriter(reader, new Stats()); final AtomicInteger updateCounts = new AtomicInteger(0); final AtomicReference updateException = new AtomicReference<>(); - executorService.submit(() -> { - try { - ClusterState clusterState = readerRef.getClusterState(); - while (!stopMutatingThread.get()) { - DocCollection collection = clusterState.getCollectionOrNull("c1"); - int currentVersion = collection != null? collection.getZNodeVersion() : 0; - System.out.println("current version " + currentVersion); - // create new collection - DocCollection state = + executorService.submit( + () -> { + try { + ClusterState clusterState = readerRef.getClusterState(); + while (!stopMutatingThread.get()) { + DocCollection collection = clusterState.getCollectionOrNull("c1"); + int currentVersion = collection != null ? collection.getZNodeVersion() : 0; + System.out.println("current version " + currentVersion); + // create new collection + DocCollection state = new DocCollection( - "c1", - new HashMap<>(), - Map.of(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), - DocRouter.DEFAULT, - currentVersion); - ZkWriteCommand wc = new ZkWriteCommand("c1", state); - writer.enqueueUpdate(clusterState, Collections.singletonList(wc), null); - clusterState = writer.writePendingUpdates(); - } - } catch (Exception e) { - updateException.set(e); - } - return null; - }); + "c1", + new HashMap<>(), + Map.of( + ZkStateReader.CONFIGNAME_PROP, + ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), + DocRouter.DEFAULT, + currentVersion); + ZkWriteCommand wc = new ZkWriteCommand("c1", state); + writer.enqueueUpdate(clusterState, Collections.singletonList(wc), null); + clusterState = writer.writePendingUpdates(); + } + } catch (Exception e) { + updateException.set(e); + } + return null; + }); executorService.shutdown(); - DocCollectionWatcher dummyWatcher = collection -> false; //do not remove itself + DocCollectionWatcher dummyWatcher = collection -> false; // do not remove itself for (int i = 0; i < RUN_COUNT; i++) { reader.registerDocCollectionWatcher("c1", dummyWatcher); TimeUnit.MILLISECONDS.sleep(10); reader.removeDocCollectionWatcher("c1", dummyWatcher); - assert(reader.getClusterState().getCollectionRef("c1").isLazilyLoaded()); //it should always be lazy loaded, as the collection is not watched anymore + assert (reader + .getClusterState() + .getCollectionRef("c1") + .isLazilyLoaded()); // it should always be lazy loaded, as the collection is not watched + // anymore } stopMutatingThread.set(true); if (updateException.get() != null) { - throw(updateException.get()); + throw (updateException.get()); } } finally { From 8f87fd84a07507cd40ffa4a9baccebe8a786beb3 Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Fri, 17 Jun 2022 20:13:41 -0700 Subject: [PATCH 13/45] Fixed check error Forbidden method invocation: java.util.concurrent.Executors#newSingleThreadExecutor() [Spawns threads with vague names; use a custom thread factory (Lucene's NamedThreadFactory, Solr's SolrNamedThreadFactory) and name threads so that you can tell (by its name) which executor it is associated with] in org.apache.solr.cloud.overseer.ZkStateReaderTest (ZkStateReaderTest.java:359) --- .../test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java index 616d5da053b..0918f5ca865 100644 --- a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java @@ -31,6 +31,7 @@ import org.apache.solr.cloud.ZkController; import org.apache.solr.cloud.ZkTestServer; import org.apache.solr.common.cloud.*; +import org.apache.solr.common.util.SolrNamedThreadFactory; import org.apache.solr.common.util.TimeSource; import org.apache.solr.handler.admin.ConfigSetsHandler; import org.apache.solr.util.TimeOut; @@ -356,7 +357,7 @@ public void testWatchRaceCondition() throws Exception { SolrZkClient zkClient = null; ZkStateReader reader = null; - ExecutorService executorService = Executors.newSingleThreadExecutor(); + ExecutorService executorService = Executors.newSingleThreadExecutor(new SolrNamedThreadFactory("zkStateReaderTest")); try { server.run(); From 174b5c4bbb7c20255edb5e423e7c3b6c24daf6b6 Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Sat, 18 Jun 2022 11:09:29 -0700 Subject: [PATCH 14/45] Fixed gradle check Forbidden method invocation: java.util.concurrent.Executors#newSingleThreadExecutor(java.util.concurrent.ThreadFactory) [Spawns threads without MDC logging context; use ExecutorUtil.newMDCAwareSingleThreadExecutor instead] --- .../test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java index 0918f5ca865..3a7f8fba3e4 100644 --- a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java @@ -31,6 +31,7 @@ import org.apache.solr.cloud.ZkController; import org.apache.solr.cloud.ZkTestServer; import org.apache.solr.common.cloud.*; +import org.apache.solr.common.util.ExecutorUtil; import org.apache.solr.common.util.SolrNamedThreadFactory; import org.apache.solr.common.util.TimeSource; import org.apache.solr.handler.admin.ConfigSetsHandler; @@ -357,7 +358,7 @@ public void testWatchRaceCondition() throws Exception { SolrZkClient zkClient = null; ZkStateReader reader = null; - ExecutorService executorService = Executors.newSingleThreadExecutor(new SolrNamedThreadFactory("zkStateReaderTest")); + ExecutorService executorService = ExecutorUtil.newMDCAwareSingleThreadExecutor(new SolrNamedThreadFactory("zkStateReaderTest")); try { server.run(); From 8c32e86668182874f5f9bbb482b25146f2b1d3e1 Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Sat, 18 Jun 2022 11:36:45 -0700 Subject: [PATCH 15/45] Code cleanup --- .../test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java index 3a7f8fba3e4..60c8b53ba86 100644 --- a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java @@ -19,7 +19,6 @@ import java.nio.file.Path; import java.util.*; import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; From 1bfedb7876769d4c407372ad43c105bef1182e65 Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Sat, 18 Jun 2022 11:38:58 -0700 Subject: [PATCH 16/45] ./gradlew :solr:solrj:spotlessApply --- .../org/apache/solr/cloud/overseer/ZkStateReaderTest.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java index 60c8b53ba86..010dcdb9d44 100644 --- a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java @@ -357,7 +357,9 @@ public void testWatchRaceCondition() throws Exception { SolrZkClient zkClient = null; ZkStateReader reader = null; - ExecutorService executorService = ExecutorUtil.newMDCAwareSingleThreadExecutor(new SolrNamedThreadFactory("zkStateReaderTest")); + ExecutorService executorService = + ExecutorUtil.newMDCAwareSingleThreadExecutor( + new SolrNamedThreadFactory("zkStateReaderTest")); try { server.run(); From 44243f6b2f7c9ea81d998c5682a2859cedc0fa99 Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Mon, 20 Jun 2022 20:28:10 -0700 Subject: [PATCH 17/45] Reproduce race condition more consistently in ZkStateReaderTest --- .../cloud/overseer/ZkStateReaderTest.java | 40 ++++++++++++++----- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java index 010dcdb9d44..af82af24ae6 100644 --- a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java @@ -16,8 +16,10 @@ */ package org.apache.solr.cloud.overseer; +import java.lang.invoke.MethodHandles; import java.nio.file.Path; import java.util.*; +import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; @@ -35,9 +37,11 @@ import org.apache.solr.common.util.TimeSource; import org.apache.solr.handler.admin.ConfigSetsHandler; import org.apache.solr.util.TimeOut; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class ZkStateReaderTest extends SolrTestCaseJ4 { - + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final long TIMEOUT = 30; public void testExternalCollectionWatchedNotWatched() throws Exception { @@ -350,8 +354,8 @@ public void testGetCurrentCollections() throws Exception { } public void testWatchRaceCondition() throws Exception { - final int RUN_COUNT = 1000; - Path zkDir = createTempDir("testConcurrencyWatch"); + final int RUN_COUNT = 10000; + Path zkDir = createTempDir("testWatchRaceCondition"); ZkTestServer server = new ZkTestServer(zkDir); @@ -384,7 +388,6 @@ public void testWatchRaceCondition() throws Exception { while (!stopMutatingThread.get()) { DocCollection collection = clusterState.getCollectionOrNull("c1"); int currentVersion = collection != null ? collection.getZNodeVersion() : 0; - System.out.println("current version " + currentVersion); // create new collection DocCollection state = new DocCollection( @@ -406,16 +409,31 @@ public void testWatchRaceCondition() throws Exception { }); executorService.shutdown(); - DocCollectionWatcher dummyWatcher = collection -> false; // do not remove itself for (int i = 0; i < RUN_COUNT; i++) { + final CountDownLatch latch = new CountDownLatch(2); + + // remove itself on 2nd trigger + DocCollectionWatcher dummyWatcher = collection -> { + latch.countDown(); + return latch.getCount() == 0; + }; reader.registerDocCollectionWatcher("c1", dummyWatcher); - TimeUnit.MILLISECONDS.sleep(10); + latch.await(10, TimeUnit.SECONDS); reader.removeDocCollectionWatcher("c1", dummyWatcher); - assert (reader - .getClusterState() - .getCollectionRef("c1") - .isLazilyLoaded()); // it should always be lazy loaded, as the collection is not watched - // anymore + + boolean refLazilyLoaded = false; + for (int j = 0 ; j < 10; j++) { + if (reader + .getClusterState() + .getCollectionRef("c1") + .isLazilyLoaded()) { + refLazilyLoaded = true; // it should eventually be lazily loaded + break; + } + log.info("ref is not lazily loaded yet. Attempt : {}", (j + 1)); + TimeUnit.MILLISECONDS.sleep(100); + } + assert(refLazilyLoaded); } stopMutatingThread.set(true); From 22aa937b4bc94d88eb8d545cf2754963ea70bb46 Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Mon, 20 Jun 2022 20:37:21 -0700 Subject: [PATCH 18/45] ./gradlew :solr:solrj:spotlessApply --- .../solr/cloud/overseer/ZkStateReaderTest.java | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java index af82af24ae6..9807a65f30f 100644 --- a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java @@ -413,27 +413,25 @@ public void testWatchRaceCondition() throws Exception { final CountDownLatch latch = new CountDownLatch(2); // remove itself on 2nd trigger - DocCollectionWatcher dummyWatcher = collection -> { - latch.countDown(); - return latch.getCount() == 0; - }; + DocCollectionWatcher dummyWatcher = + collection -> { + latch.countDown(); + return latch.getCount() == 0; + }; reader.registerDocCollectionWatcher("c1", dummyWatcher); latch.await(10, TimeUnit.SECONDS); reader.removeDocCollectionWatcher("c1", dummyWatcher); boolean refLazilyLoaded = false; - for (int j = 0 ; j < 10; j++) { - if (reader - .getClusterState() - .getCollectionRef("c1") - .isLazilyLoaded()) { + for (int j = 0; j < 10; j++) { + if (reader.getClusterState().getCollectionRef("c1").isLazilyLoaded()) { refLazilyLoaded = true; // it should eventually be lazily loaded break; } log.info("ref is not lazily loaded yet. Attempt : {}", (j + 1)); TimeUnit.MILLISECONDS.sleep(100); } - assert(refLazilyLoaded); + assert (refLazilyLoaded); } stopMutatingThread.set(true); From c74c86be773526468748554a7102365630914e68 Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Mon, 20 Jun 2022 20:41:39 -0700 Subject: [PATCH 19/45] Code cleanup --- .../org/apache/solr/cloud/overseer/ZkStateReaderTest.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java index 9807a65f30f..25eee7ceac4 100644 --- a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java @@ -428,7 +428,9 @@ public void testWatchRaceCondition() throws Exception { refLazilyLoaded = true; // it should eventually be lazily loaded break; } - log.info("ref is not lazily loaded yet. Attempt : {}", (j + 1)); + if (log.isInfoEnabled()) { + log.info("ref is not lazily loaded yet. Attempt : {}", (j + 1)); + } TimeUnit.MILLISECONDS.sleep(100); } assert (refLazilyLoaded); From 1861dba1bf7f4a78eb08ded74107e03c62c973e0 Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Mon, 20 Jun 2022 20:43:01 -0700 Subject: [PATCH 20/45] Code cleanup --- .../org/apache/solr/cloud/overseer/ZkStateReaderTest.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java index 25eee7ceac4..f9590089f8a 100644 --- a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java @@ -428,9 +428,9 @@ public void testWatchRaceCondition() throws Exception { refLazilyLoaded = true; // it should eventually be lazily loaded break; } - if (log.isInfoEnabled()) { - log.info("ref is not lazily loaded yet. Attempt : {}", (j + 1)); - } + int attempt = j + 1; + log.info("ref is not lazily loaded yet. Attempt : {}", attempt); + TimeUnit.MILLISECONDS.sleep(100); } assert (refLazilyLoaded); From 479227d9dc22d5e4397f8e550c1b660fb18b288a Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Tue, 21 Jun 2022 09:54:12 -0700 Subject: [PATCH 21/45] Fixed unit test cases --- .../org/apache/solr/cloud/overseer/ZkStateReaderTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java index f9590089f8a..1aff3896960 100644 --- a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java @@ -217,7 +217,7 @@ public void testForciblyRefreshAllClusterState() throws Exception { reader = new ZkStateReader(zkClient); reader.createClusterStateWatchersAndUpdate(); - reader.registerCore("c1"); + reader.registerCore("c1"); // watching c1, so it should get non lazy reference zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true); reader.forciblyRefreshAllClusterStateSlow(); @@ -286,7 +286,7 @@ public void testForciblyRefreshAllClusterState() throws Exception { ref = reader.getClusterState().getCollectionRef("c2"); assertNotNull(ref); - assertFalse(ref.isLazilyLoaded()); + assert (ref.isLazilyLoaded()); // c2 should be lazily loaded as it's not watched assertEquals(0, ref.get().getZNodeVersion()); } finally { IOUtils.close(reader, zkClient); From d7b85e4695f10bbac8aca9e766a3748d0b426f65 Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Tue, 21 Jun 2022 16:05:01 -0700 Subject: [PATCH 22/45] Do not use * for imports Fixed formatting --- .../solr/cloud/overseer/ZkStateReaderTest.java | 13 +++++++++++-- .../org/apache/solr/common/cloud/ZkStateReader.java | 5 ++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java index 1aff3896960..1a321e483c1 100644 --- a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java @@ -18,7 +18,11 @@ import java.lang.invoke.MethodHandles; import java.nio.file.Path; -import java.util.*; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeUnit; @@ -31,7 +35,12 @@ import org.apache.solr.cloud.Stats; import org.apache.solr.cloud.ZkController; import org.apache.solr.cloud.ZkTestServer; -import org.apache.solr.common.cloud.*; +import org.apache.solr.common.cloud.ClusterState; +import org.apache.solr.common.cloud.DocCollection; +import org.apache.solr.common.cloud.DocCollectionWatcher; +import org.apache.solr.common.cloud.DocRouter; +import org.apache.solr.common.cloud.SolrZkClient; +import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.util.ExecutorUtil; import org.apache.solr.common.util.SolrNamedThreadFactory; import org.apache.solr.common.util.TimeSource; diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java index f9d6590f1c0..1142cd891a1 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java @@ -608,9 +608,8 @@ private void constructState(Set changedCollections) { // Add collections for (Map.Entry> entry : collectionWatches.entrySet()) { - if (entry.getValue().currentDoc - != null) { // if the doc is null for the collection watch, then it should not be inserted - // into the state + if (entry.getValue().currentDoc != null) { + // if the doc is null for the collection watch, then it should not be inserted into the state result.putIfAbsent( entry.getKey(), new ClusterState.CollectionRef(entry.getValue().currentDoc)); } From fbc91ff669d1a6ac2509d6741de8e360cebedab0 Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Tue, 21 Jun 2022 17:20:55 -0700 Subject: [PATCH 23/45] ./gradlew :solr:solrj:spotlessApply --- .../src/java/org/apache/solr/common/cloud/ZkStateReader.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java index 1142cd891a1..b4759143cbb 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java @@ -609,7 +609,8 @@ private void constructState(Set changedCollections) { for (Map.Entry> entry : collectionWatches.entrySet()) { if (entry.getValue().currentDoc != null) { - // if the doc is null for the collection watch, then it should not be inserted into the state + // if the doc is null for the collection watch, then it should not be inserted into the + // state result.putIfAbsent( entry.getKey(), new ClusterState.CollectionRef(entry.getValue().currentDoc)); } From d8ceb10cddbe838d9ed347a282c42810d3c37160 Mon Sep 17 00:00:00 2001 From: patsonluk Date: Thu, 30 Jun 2022 15:48:41 -0700 Subject: [PATCH 24/45] Update solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java Co-authored-by: Houston Putman --- .../java/org/apache/solr/common/cloud/ZkStateReader.java | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java index b4759143cbb..eaf2209b59d 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java @@ -287,7 +287,7 @@ private Set activeCollections() { * @return whether an active watch exists for such collection */ private boolean updateDocCollection(String collection, DocCollection newState) { - DocCollectionWatch watch = get(collection); + DocCollectionWatch finalWatch = computeIfPresent(collection, (col, watch) -> { if (watch != null) { DocCollection oldState = watch.currentDoc; if (oldState == null && newState == null) { @@ -317,11 +317,10 @@ private boolean updateDocCollection(String collection, DocCollection newState) { } } } - return true; - } else { - return false; } - } + return watch; + }); + return finalWatch != null; } private static class DocCollectionWatch extends CollectionWatch { From 33c99f49a3ed5fab5a6439cf2182c47a4431cd7c Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Thu, 30 Jun 2022 16:33:17 -0700 Subject: [PATCH 25/45] Code cleanup/renaming after code review --- .../solr/common/cloud/ZkStateReader.java | 61 +++++++++---------- 1 file changed, 30 insertions(+), 31 deletions(-) diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java index eaf2209b59d..a253f12490d 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java @@ -191,7 +191,7 @@ public class ZkStateReader implements SolrCloseable { private final Runnable securityNodeListener; /** - * Collections with active watches. The {@link DocCollectionWatch} inside for each collection + * Collections with active watches. The {@link StatefulCollectionWatch} inside for each collection * might also contain the latest DocCollection (state) observed */ private DocCollectionWatches collectionWatches = new DocCollectionWatches(); @@ -250,7 +250,7 @@ public boolean canBeRemoved() { *

Each watcher DocCollectionWatch also contains the latest DocCollection (state) observed */ private static class DocCollectionWatches - extends ConcurrentHashMap> { + extends ConcurrentHashMap> { /** * Gets the DocCollection (state) of the collection which the corresponding watch last observed @@ -260,8 +260,8 @@ private static class DocCollectionWatches * collection. */ private DocCollection getDocCollection(String collection) { - DocCollectionWatch watch = get(collection); - return watch != null ? watch.currentDoc : null; + StatefulCollectionWatch watch = get(collection); + return watch != null ? watch.currentState : null; } /** @@ -272,14 +272,14 @@ private DocCollection getDocCollection(String collection) { private Set activeCollections() { return this.entrySet().stream() .filter( - (Entry> entry) -> - entry.getValue().currentDoc != null) + (Entry> entry) -> + entry.getValue().currentState != null) .map(Entry::getKey) .collect(Collectors.toUnmodifiableSet()); } /** - * Updates the latest observed DocCollection (state) of the {@link DocCollectionWatch} if the + * Updates the latest observed DocCollection (state) of the {@link StatefulCollectionWatch} if the * collection is being watched * * @param collection the collection name @@ -287,44 +287,43 @@ private Set activeCollections() { * @return whether an active watch exists for such collection */ private boolean updateDocCollection(String collection, DocCollection newState) { - DocCollectionWatch finalWatch = computeIfPresent(collection, (col, watch) -> { - if (watch != null) { - DocCollection oldState = watch.currentDoc; + StatefulCollectionWatch finalWatch = computeIfPresent(collection, (col, watch) -> { + DocCollection oldState = watch.currentState; if (oldState == null && newState == null) { // OK, the collection not yet exist in ZK } else if (oldState == null) { if (log.isDebugEnabled()) { log.debug("Add data for [{}] ver [{}]", collection, newState.getZNodeVersion()); } - watch.currentDoc = newState; + watch.currentState = newState; } else if (newState == null) { log.debug("Removing cached collection state for [{}]", collection); - watch.currentDoc = null; + watch.currentState = null; } else { // both new and old states are non-null int oldCVersion = - oldState.getPerReplicaStates() == null ? -1 : oldState.getPerReplicaStates().cversion; + oldState.getPerReplicaStates() == null ? -1 : oldState.getPerReplicaStates().cversion; int newCVersion = - newState.getPerReplicaStates() == null ? -1 : newState.getPerReplicaStates().cversion; + newState.getPerReplicaStates() == null ? -1 : newState.getPerReplicaStates().cversion; if (oldState.getZNodeVersion() < newState.getZNodeVersion() - || oldCVersion < newCVersion) { - watch.currentDoc = newState; + || oldCVersion < newCVersion) { + watch.currentState = newState; if (log.isDebugEnabled()) { log.debug( - "Updating data for [{}] from [{}] to [{}]", - collection, - oldState.getZNodeVersion(), - newState.getZNodeVersion()); + "Updating data for [{}] from [{}] to [{}]", + collection, + oldState.getZNodeVersion(), + newState.getZNodeVersion()); } } } - } - return watch; - }); - return finalWatch != null; + return watch; + }); + return finalWatch != null; + } } - private static class DocCollectionWatch extends CollectionWatch { - private DocCollection currentDoc; + private static class StatefulCollectionWatch extends CollectionWatch { + private DocCollection currentState; } public static final Set KNOWN_CLUSTER_PROPS = @@ -605,13 +604,13 @@ private void constructState(Set changedCollections) { Map result = new LinkedHashMap<>(); // Add collections - for (Map.Entry> entry : + for (Map.Entry> entry : collectionWatches.entrySet()) { - if (entry.getValue().currentDoc != null) { + if (entry.getValue().currentState != null) { // if the doc is null for the collection watch, then it should not be inserted into the // state result.putIfAbsent( - entry.getKey(), new ClusterState.CollectionRef(entry.getValue().currentDoc)); + entry.getKey(), new ClusterState.CollectionRef(entry.getValue().currentState)); } } @@ -1698,7 +1697,7 @@ public void registerCore(String collection) { (k, v) -> { if (v == null) { reconstructState.set(true); - v = new DocCollectionWatch<>(); + v = new StatefulCollectionWatch<>(); } v.coreRefCount++; return v; @@ -1782,7 +1781,7 @@ public void registerDocCollectionWatcher(String collection, DocCollectionWatcher collection, (k, v) -> { if (v == null) { - v = new DocCollectionWatch<>(); + v = new StatefulCollectionWatch<>(); watchSet.set(true); } v.stateWatchers.add(stateWatcher); From cc314e62efbd637c2817f634514e07662c0e7aae Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Thu, 30 Jun 2022 18:31:07 -0700 Subject: [PATCH 26/45] Code changes after code review --- .../cloud/overseer/ZkStateReaderTest.java | 235 +++++++----------- .../solr/common/cloud/ZkStateReader.java | 73 +++--- 2 files changed, 128 insertions(+), 180 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java index 1a321e483c1..5d99bcbf1a8 100644 --- a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java @@ -16,19 +16,6 @@ */ package org.apache.solr.cloud.overseer; -import java.lang.invoke.MethodHandles; -import java.nio.file.Path; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicReference; import org.apache.lucene.util.IOUtils; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.cloud.OverseerTest; @@ -49,28 +36,70 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.Closeable; +import java.io.IOException; +import java.lang.invoke.MethodHandles; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; + public class ZkStateReaderTest extends SolrTestCaseJ4 { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final long TIMEOUT = 30; - public void testExternalCollectionWatchedNotWatched() throws Exception { + private static class TestFixture implements Closeable { + private final ZkTestServer server; + private final SolrZkClient zkClient; + private final ZkStateReader reader; + private final ZkStateWriter writer; + + private TestFixture(ZkTestServer server, SolrZkClient zkClient, ZkStateReader reader, ZkStateWriter writer) { + this.server = server; + this.zkClient = zkClient; + this.reader = reader; + this.writer = writer; + } + + + @Override + public void close() throws IOException { + IOUtils.close(reader, zkClient); + try { + server.shutdown(); + } catch (InterruptedException e) { + //ok. Shutting down anyway + } + } + } + + private static TestFixture setupTestFixture(String testPrefix) throws Exception { Path zkDir = createTempDir("testExternalCollectionWatchedNotWatched"); ZkTestServer server = new ZkTestServer(zkDir); - SolrZkClient zkClient = null; - ZkStateReader reader = null; + server.run(); + SolrZkClient zkClient = new SolrZkClient(server.getZkAddress(), OverseerTest.DEFAULT_CONNECTION_TIMEOUT); + ZkController.createClusterZkNodes(zkClient); - try { - server.run(); + ZkStateReader reader = new ZkStateReader(zkClient); + reader.createClusterStateWatchersAndUpdate(); - zkClient = new SolrZkClient(server.getZkAddress(), OverseerTest.DEFAULT_CONNECTION_TIMEOUT); - ZkController.createClusterZkNodes(zkClient); + ZkStateWriter writer = new ZkStateWriter(reader, new Stats()); - reader = new ZkStateReader(zkClient); - reader.createClusterStateWatchersAndUpdate(); - - ZkStateWriter writer = new ZkStateWriter(reader, new Stats()); + return new TestFixture(server, zkClient, reader, writer); + } - zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true); + public void testExternalCollectionWatchedNotWatched() throws Exception { + try (TestFixture fixture = setupTestFixture("testExternalCollectionWatchedNotWatched")){ + ZkStateWriter writer = fixture.writer; + ZkStateReader reader = fixture.reader; + fixture.zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true); // create new collection ZkWriteCommand c1 = @@ -82,6 +111,7 @@ public void testExternalCollectionWatchedNotWatched() throws Exception { Map.of(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), DocRouter.DEFAULT, 0)); + writer.enqueueUpdate(reader.getClusterState(), Collections.singletonList(c1), null); writer.writePendingUpdates(); reader.forceUpdateCollection("c1"); @@ -91,33 +121,16 @@ public void testExternalCollectionWatchedNotWatched() throws Exception { assertFalse(reader.getClusterState().getCollectionRef("c1").isLazilyLoaded()); reader.unregisterCore("c1"); assertTrue(reader.getClusterState().getCollectionRef("c1").isLazilyLoaded()); - - } finally { - IOUtils.close(reader, zkClient); - server.shutdown(); } } public void testCollectionStateWatcherCaching() throws Exception { - Path zkDir = createTempDir("testCollectionStateWatcherCaching"); + try (TestFixture fixture = setupTestFixture("testCollectionStateWatcherCaching")){ + ZkStateWriter writer = fixture.writer; + ZkStateReader reader = fixture.reader; - ZkTestServer server = new ZkTestServer(zkDir); - - SolrZkClient zkClient = null; - ZkStateReader reader = null; - - try { - server.run(); - - zkClient = new SolrZkClient(server.getZkAddress(), OverseerTest.DEFAULT_CONNECTION_TIMEOUT); - ZkController.createClusterZkNodes(zkClient); - - reader = new ZkStateReader(zkClient); - reader.createClusterStateWatchersAndUpdate(); + fixture.zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true); - zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true); - - ZkStateWriter writer = new ZkStateWriter(reader, new Stats()); DocCollection state = new DocCollection( "c1", @@ -128,7 +141,7 @@ public void testCollectionStateWatcherCaching() throws Exception { ZkWriteCommand wc = new ZkWriteCommand("c1", state); writer.enqueueUpdate(reader.getClusterState(), Collections.singletonList(wc), null); writer.writePendingUpdates(); - assertTrue(zkClient.exists(ZkStateReader.COLLECTIONS_ZKNODE + "/c1/state.json", true)); + assertTrue(fixture.zkClient.exists(ZkStateReader.COLLECTIONS_ZKNODE + "/c1/state.json", true)); reader.waitForState( "c1", 1, TimeUnit.SECONDS, (liveNodes, collectionState) -> collectionState != null); @@ -150,41 +163,27 @@ public void testCollectionStateWatcherCaching() throws Exception { } } assertTrue("Could not find updated property in collection c1 even after 5 seconds", found); - } finally { - IOUtils.close(reader, zkClient); - server.shutdown(); } } - public void testWatchedCollectionCreation() throws Exception { - Path zkDir = createTempDir("testWatchedCollectionCreation"); - - ZkTestServer server = new ZkTestServer(zkDir); - - SolrZkClient zkClient = null; - ZkStateReader reader = null; - try { - server.run(); - zkClient = new SolrZkClient(server.getZkAddress(), OverseerTest.DEFAULT_CONNECTION_TIMEOUT); - ZkController.createClusterZkNodes(zkClient); + public void testWatchedCollectionCreation() throws Exception { + try (TestFixture fixture = setupTestFixture("testWatchedCollectionCreation")){ + ZkStateWriter writer = fixture.writer; + ZkStateReader reader = fixture.reader; - reader = new ZkStateReader(zkClient); - reader.createClusterStateWatchersAndUpdate(); reader.registerCore("c1"); // Initially there should be no c1 collection. assertNull(reader.getClusterState().getCollectionRef("c1")); - zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true); + fixture.zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true); reader.forceUpdateCollection("c1"); // Still no c1 collection, despite a collection path. assertNull(reader.getClusterState().getCollectionRef("c1")); - ZkStateWriter writer = new ZkStateWriter(reader, new Stats()); - // create new collection DocCollection state = new DocCollection( @@ -197,44 +196,28 @@ public void testWatchedCollectionCreation() throws Exception { writer.enqueueUpdate(reader.getClusterState(), Collections.singletonList(wc), null); writer.writePendingUpdates(); - assertTrue(zkClient.exists(ZkStateReader.COLLECTIONS_ZKNODE + "/c1/state.json", true)); + assertTrue(fixture.zkClient.exists(ZkStateReader.COLLECTIONS_ZKNODE + "/c1/state.json", true)); // reader.forceUpdateCollection("c1"); reader.waitForState("c1", TIMEOUT, TimeUnit.SECONDS, (n, c) -> c != null); ClusterState.CollectionRef ref = reader.getClusterState().getCollectionRef("c1"); assertNotNull(ref); assertFalse(ref.isLazilyLoaded()); - } finally { - IOUtils.close(reader, zkClient); - server.shutdown(); } } public void testForciblyRefreshAllClusterState() throws Exception { - Path zkDir = createTempDir("testForciblyRefreshAllClusterState"); - - ZkTestServer server = new ZkTestServer(zkDir); - - SolrZkClient zkClient = null; - ZkStateReader reader = null; - - try { - server.run(); + try (TestFixture fixture = setupTestFixture("testForciblyRefreshAllClusterState")){ + ZkStateWriter writer = fixture.writer; + ZkStateReader reader = fixture.reader; - zkClient = new SolrZkClient(server.getZkAddress(), OverseerTest.DEFAULT_CONNECTION_TIMEOUT); - ZkController.createClusterZkNodes(zkClient); - - reader = new ZkStateReader(zkClient); - reader.createClusterStateWatchersAndUpdate(); reader.registerCore("c1"); // watching c1, so it should get non lazy reference - zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true); + fixture.zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true); reader.forciblyRefreshAllClusterStateSlow(); // Initially there should be no c1 collection. assertNull(reader.getClusterState().getCollectionRef("c1")); - ZkStateWriter writer = new ZkStateWriter(reader, new Stats()); - // create new collection DocCollection state = new DocCollection( @@ -247,7 +230,7 @@ public void testForciblyRefreshAllClusterState() throws Exception { writer.enqueueUpdate(reader.getClusterState(), Collections.singletonList(wc), null); writer.writePendingUpdates(); - assertTrue(zkClient.exists(ZkStateReader.COLLECTIONS_ZKNODE + "/c1/state.json", true)); + assertTrue(fixture.zkClient.exists(ZkStateReader.COLLECTIONS_ZKNODE + "/c1/state.json", true)); reader.forciblyRefreshAllClusterStateSlow(); ClusterState.CollectionRef ref = reader.getClusterState().getCollectionRef("c1"); @@ -276,7 +259,7 @@ public void testForciblyRefreshAllClusterState() throws Exception { // delete the collection c1, add a collection c2 that is NOT watched ZkWriteCommand wc1 = new ZkWriteCommand("c1", null); - zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c2", true); + fixture.zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c2", true); state = new DocCollection( "c2", @@ -297,30 +280,16 @@ public void testForciblyRefreshAllClusterState() throws Exception { assertNotNull(ref); assert (ref.isLazilyLoaded()); // c2 should be lazily loaded as it's not watched assertEquals(0, ref.get().getZNodeVersion()); - } finally { - IOUtils.close(reader, zkClient); - server.shutdown(); } } public void testGetCurrentCollections() throws Exception { - Path zkDir = createTempDir("testGetCurrentCollections"); - - ZkTestServer server = new ZkTestServer(zkDir); - - SolrZkClient zkClient = null; - ZkStateReader reader = null; - - try { - server.run(); + try (TestFixture fixture = setupTestFixture("testGetCurrentCollections")){ + ZkStateWriter writer = fixture.writer; + ZkStateReader reader = fixture.reader; - zkClient = new SolrZkClient(server.getZkAddress(), OverseerTest.DEFAULT_CONNECTION_TIMEOUT); - ZkController.createClusterZkNodes(zkClient); - - reader = new ZkStateReader(zkClient); - reader.createClusterStateWatchersAndUpdate(); reader.registerCore("c1"); // listen to c1. not yet exist - zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true); + fixture.zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true); reader.forceUpdateCollection("c1"); Set currentCollections = reader.getCurrentCollections(); assertEquals(0, currentCollections.size()); // no active collections yet @@ -343,10 +312,9 @@ public void testGetCurrentCollections() throws Exception { 0); // do not listen to c2 - zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c2", true); + fixture.zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c2", true); ZkWriteCommand wc2 = new ZkWriteCommand("c2", state2); - ZkStateWriter writer = new ZkStateWriter(reader, new Stats()); writer.enqueueUpdate(reader.getClusterState(), Arrays.asList(wc1, wc2), null); writer.writePendingUpdates(); @@ -356,44 +324,28 @@ public void testGetCurrentCollections() throws Exception { reader.getCurrentCollections(); // should detect both collections (c1 watched, c2 lazy // loaded) assertEquals(2, currentCollections.size()); - } finally { - IOUtils.close(reader, zkClient); - server.shutdown(); } } public void testWatchRaceCondition() throws Exception { final int RUN_COUNT = 10000; - Path zkDir = createTempDir("testWatchRaceCondition"); - - ZkTestServer server = new ZkTestServer(zkDir); - - SolrZkClient zkClient = null; - ZkStateReader reader = null; ExecutorService executorService = ExecutorUtil.newMDCAwareSingleThreadExecutor( new SolrNamedThreadFactory("zkStateReaderTest")); - try { - server.run(); - - zkClient = new SolrZkClient(server.getZkAddress(), OverseerTest.DEFAULT_CONNECTION_TIMEOUT); - ZkController.createClusterZkNodes(zkClient); - - reader = new ZkStateReader(zkClient); - final ZkStateReader readerRef = reader; - reader.createClusterStateWatchersAndUpdate(); - zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true); + try (TestFixture fixture = setupTestFixture("testWatchRaceCondition")){ + ZkStateWriter writer = fixture.writer; + final ZkStateReader reader = fixture.reader; + fixture.zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true); // start another thread to constantly updating the state final AtomicBoolean stopMutatingThread = new AtomicBoolean(false); - final ZkStateWriter writer = new ZkStateWriter(reader, new Stats()); - final AtomicInteger updateCounts = new AtomicInteger(0); + final AtomicReference updateException = new AtomicReference<>(); executorService.submit( () -> { try { - ClusterState clusterState = readerRef.getClusterState(); + ClusterState clusterState = reader.getClusterState(); while (!stopMutatingThread.get()) { DocCollection collection = clusterState.getCollectionOrNull("c1"); int currentVersion = collection != null ? collection.getZNodeVersion() : 0; @@ -428,21 +380,12 @@ public void testWatchRaceCondition() throws Exception { return latch.getCount() == 0; }; reader.registerDocCollectionWatcher("c1", dummyWatcher); - latch.await(10, TimeUnit.SECONDS); + assertTrue("Missing expected collection updates after the wait", latch.await(10, TimeUnit.SECONDS)); reader.removeDocCollectionWatcher("c1", dummyWatcher); - boolean refLazilyLoaded = false; - for (int j = 0; j < 10; j++) { - if (reader.getClusterState().getCollectionRef("c1").isLazilyLoaded()) { - refLazilyLoaded = true; // it should eventually be lazily loaded - break; - } - int attempt = j + 1; - log.info("ref is not lazily loaded yet. Attempt : {}", attempt); - - TimeUnit.MILLISECONDS.sleep(100); - } - assert (refLazilyLoaded); + TimeOut timeOut = new TimeOut(1000, TimeUnit.MILLISECONDS, TimeSource.NANO_TIME); + timeOut.waitFor("The ref is not lazily loaded after waiting", + () -> reader.getClusterState().getCollectionRef("c1").isLazilyLoaded()); } stopMutatingThread.set(true); @@ -451,9 +394,7 @@ public void testWatchRaceCondition() throws Exception { } } finally { - IOUtils.close(reader, zkClient); - executorService.awaitTermination(10, TimeUnit.SECONDS); - server.shutdown(); + ExecutorUtil.awaitTermination(executorService); } } } diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java index a253f12490d..f7abfdb2ce4 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java @@ -279,45 +279,52 @@ private Set activeCollections() { } /** - * Updates the latest observed DocCollection (state) of the {@link StatefulCollectionWatch} if the - * collection is being watched + * Updates the latest observed DocCollection (state) of the {@link StatefulCollectionWatch} if + * the collection is being watched * * @param collection the collection name * @param newState the new DocCollection (state) observed * @return whether an active watch exists for such collection */ private boolean updateDocCollection(String collection, DocCollection newState) { - StatefulCollectionWatch finalWatch = computeIfPresent(collection, (col, watch) -> { - DocCollection oldState = watch.currentState; - if (oldState == null && newState == null) { - // OK, the collection not yet exist in ZK - } else if (oldState == null) { - if (log.isDebugEnabled()) { - log.debug("Add data for [{}] ver [{}]", collection, newState.getZNodeVersion()); - } - watch.currentState = newState; - } else if (newState == null) { - log.debug("Removing cached collection state for [{}]", collection); - watch.currentState = null; - } else { // both new and old states are non-null - int oldCVersion = - oldState.getPerReplicaStates() == null ? -1 : oldState.getPerReplicaStates().cversion; - int newCVersion = - newState.getPerReplicaStates() == null ? -1 : newState.getPerReplicaStates().cversion; - if (oldState.getZNodeVersion() < newState.getZNodeVersion() - || oldCVersion < newCVersion) { - watch.currentState = newState; - if (log.isDebugEnabled()) { - log.debug( - "Updating data for [{}] from [{}] to [{}]", - collection, - oldState.getZNodeVersion(), - newState.getZNodeVersion()); - } - } - } - return watch; - }); + StatefulCollectionWatch finalWatch = + computeIfPresent( + collection, + (col, watch) -> { + DocCollection oldState = watch.currentState; + if (oldState == null && newState == null) { + // OK, the collection not yet exist in ZK + } else if (oldState == null) { + if (log.isDebugEnabled()) { + log.debug("Add data for [{}] ver [{}]", collection, newState.getZNodeVersion()); + } + watch.currentState = newState; + } else if (newState == null) { + log.debug("Removing cached collection state for [{}]", collection); + watch.currentState = null; + } else { // both new and old states are non-null + int oldCVersion = + oldState.getPerReplicaStates() == null + ? -1 + : oldState.getPerReplicaStates().cversion; + int newCVersion = + newState.getPerReplicaStates() == null + ? -1 + : newState.getPerReplicaStates().cversion; + if (oldState.getZNodeVersion() < newState.getZNodeVersion() + || oldCVersion < newCVersion) { + watch.currentState = newState; + if (log.isDebugEnabled()) { + log.debug( + "Updating data for [{}] from [{}] to [{}]", + collection, + oldState.getZNodeVersion(), + newState.getZNodeVersion()); + } + } + } + return watch; + }); return finalWatch != null; } } From a2931669bdfaa4f057188ab998240c8fe45b6dde Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Thu, 30 Jun 2022 18:38:02 -0700 Subject: [PATCH 27/45] ./gradlew :solr:core:spotlessApply --- .../cloud/overseer/ZkStateReaderTest.java | 70 ++++++++++--------- 1 file changed, 37 insertions(+), 33 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java index 5d99bcbf1a8..e864f4b01ec 100644 --- a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java @@ -16,6 +16,20 @@ */ package org.apache.solr.cloud.overseer; +import java.io.Closeable; +import java.io.IOException; +import java.lang.invoke.MethodHandles; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; import org.apache.lucene.util.IOUtils; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.cloud.OverseerTest; @@ -36,21 +50,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.Closeable; -import java.io.IOException; -import java.lang.invoke.MethodHandles; -import java.nio.file.Path; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicReference; - public class ZkStateReaderTest extends SolrTestCaseJ4 { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final long TIMEOUT = 30; @@ -61,21 +60,21 @@ private static class TestFixture implements Closeable { private final ZkStateReader reader; private final ZkStateWriter writer; - private TestFixture(ZkTestServer server, SolrZkClient zkClient, ZkStateReader reader, ZkStateWriter writer) { + private TestFixture( + ZkTestServer server, SolrZkClient zkClient, ZkStateReader reader, ZkStateWriter writer) { this.server = server; this.zkClient = zkClient; this.reader = reader; this.writer = writer; } - @Override public void close() throws IOException { IOUtils.close(reader, zkClient); try { server.shutdown(); } catch (InterruptedException e) { - //ok. Shutting down anyway + // ok. Shutting down anyway } } } @@ -84,7 +83,8 @@ private static TestFixture setupTestFixture(String testPrefix) throws Exception Path zkDir = createTempDir("testExternalCollectionWatchedNotWatched"); ZkTestServer server = new ZkTestServer(zkDir); server.run(); - SolrZkClient zkClient = new SolrZkClient(server.getZkAddress(), OverseerTest.DEFAULT_CONNECTION_TIMEOUT); + SolrZkClient zkClient = + new SolrZkClient(server.getZkAddress(), OverseerTest.DEFAULT_CONNECTION_TIMEOUT); ZkController.createClusterZkNodes(zkClient); ZkStateReader reader = new ZkStateReader(zkClient); @@ -96,7 +96,7 @@ private static TestFixture setupTestFixture(String testPrefix) throws Exception } public void testExternalCollectionWatchedNotWatched() throws Exception { - try (TestFixture fixture = setupTestFixture("testExternalCollectionWatchedNotWatched")){ + try (TestFixture fixture = setupTestFixture("testExternalCollectionWatchedNotWatched")) { ZkStateWriter writer = fixture.writer; ZkStateReader reader = fixture.reader; fixture.zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true); @@ -125,7 +125,7 @@ public void testExternalCollectionWatchedNotWatched() throws Exception { } public void testCollectionStateWatcherCaching() throws Exception { - try (TestFixture fixture = setupTestFixture("testCollectionStateWatcherCaching")){ + try (TestFixture fixture = setupTestFixture("testCollectionStateWatcherCaching")) { ZkStateWriter writer = fixture.writer; ZkStateReader reader = fixture.reader; @@ -141,7 +141,8 @@ public void testCollectionStateWatcherCaching() throws Exception { ZkWriteCommand wc = new ZkWriteCommand("c1", state); writer.enqueueUpdate(reader.getClusterState(), Collections.singletonList(wc), null); writer.writePendingUpdates(); - assertTrue(fixture.zkClient.exists(ZkStateReader.COLLECTIONS_ZKNODE + "/c1/state.json", true)); + assertTrue( + fixture.zkClient.exists(ZkStateReader.COLLECTIONS_ZKNODE + "/c1/state.json", true)); reader.waitForState( "c1", 1, TimeUnit.SECONDS, (liveNodes, collectionState) -> collectionState != null); @@ -166,10 +167,8 @@ public void testCollectionStateWatcherCaching() throws Exception { } } - - public void testWatchedCollectionCreation() throws Exception { - try (TestFixture fixture = setupTestFixture("testWatchedCollectionCreation")){ + try (TestFixture fixture = setupTestFixture("testWatchedCollectionCreation")) { ZkStateWriter writer = fixture.writer; ZkStateReader reader = fixture.reader; @@ -196,7 +195,8 @@ public void testWatchedCollectionCreation() throws Exception { writer.enqueueUpdate(reader.getClusterState(), Collections.singletonList(wc), null); writer.writePendingUpdates(); - assertTrue(fixture.zkClient.exists(ZkStateReader.COLLECTIONS_ZKNODE + "/c1/state.json", true)); + assertTrue( + fixture.zkClient.exists(ZkStateReader.COLLECTIONS_ZKNODE + "/c1/state.json", true)); // reader.forceUpdateCollection("c1"); reader.waitForState("c1", TIMEOUT, TimeUnit.SECONDS, (n, c) -> c != null); @@ -207,7 +207,7 @@ public void testWatchedCollectionCreation() throws Exception { } public void testForciblyRefreshAllClusterState() throws Exception { - try (TestFixture fixture = setupTestFixture("testForciblyRefreshAllClusterState")){ + try (TestFixture fixture = setupTestFixture("testForciblyRefreshAllClusterState")) { ZkStateWriter writer = fixture.writer; ZkStateReader reader = fixture.reader; @@ -230,7 +230,8 @@ public void testForciblyRefreshAllClusterState() throws Exception { writer.enqueueUpdate(reader.getClusterState(), Collections.singletonList(wc), null); writer.writePendingUpdates(); - assertTrue(fixture.zkClient.exists(ZkStateReader.COLLECTIONS_ZKNODE + "/c1/state.json", true)); + assertTrue( + fixture.zkClient.exists(ZkStateReader.COLLECTIONS_ZKNODE + "/c1/state.json", true)); reader.forciblyRefreshAllClusterStateSlow(); ClusterState.CollectionRef ref = reader.getClusterState().getCollectionRef("c1"); @@ -284,7 +285,7 @@ public void testForciblyRefreshAllClusterState() throws Exception { } public void testGetCurrentCollections() throws Exception { - try (TestFixture fixture = setupTestFixture("testGetCurrentCollections")){ + try (TestFixture fixture = setupTestFixture("testGetCurrentCollections")) { ZkStateWriter writer = fixture.writer; ZkStateReader reader = fixture.reader; @@ -333,7 +334,7 @@ public void testWatchRaceCondition() throws Exception { ExecutorUtil.newMDCAwareSingleThreadExecutor( new SolrNamedThreadFactory("zkStateReaderTest")); - try (TestFixture fixture = setupTestFixture("testWatchRaceCondition")){ + try (TestFixture fixture = setupTestFixture("testWatchRaceCondition")) { ZkStateWriter writer = fixture.writer; final ZkStateReader reader = fixture.reader; fixture.zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true); @@ -380,12 +381,15 @@ public void testWatchRaceCondition() throws Exception { return latch.getCount() == 0; }; reader.registerDocCollectionWatcher("c1", dummyWatcher); - assertTrue("Missing expected collection updates after the wait", latch.await(10, TimeUnit.SECONDS)); + assertTrue( + "Missing expected collection updates after the wait", + latch.await(10, TimeUnit.SECONDS)); reader.removeDocCollectionWatcher("c1", dummyWatcher); TimeOut timeOut = new TimeOut(1000, TimeUnit.MILLISECONDS, TimeSource.NANO_TIME); - timeOut.waitFor("The ref is not lazily loaded after waiting", - () -> reader.getClusterState().getCollectionRef("c1").isLazilyLoaded()); + timeOut.waitFor( + "The ref is not lazily loaded after waiting", + () -> reader.getClusterState().getCollectionRef("c1").isLazilyLoaded()); } stopMutatingThread.set(true); From 658d5b74f8ee825873fa0bc8abab7791c9de5fcd Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Wed, 6 Jul 2022 12:10:59 -0700 Subject: [PATCH 28/45] Refactoring after review. Creating a new branch as this seems to fail unit test cases in ZkStateReaderTest quite consistently --- .../solr/common/cloud/ZkStateReader.java | 54 ++++++++++++------- 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java index f7abfdb2ce4..c9f5f1a8deb 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java @@ -42,6 +42,7 @@ import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; +import java.util.function.BiFunction; import java.util.function.Predicate; import java.util.function.UnaryOperator; import java.util.stream.Collectors; @@ -249,8 +250,8 @@ public boolean canBeRemoved() { * *

Each watcher DocCollectionWatch also contains the latest DocCollection (state) observed */ - private static class DocCollectionWatches - extends ConcurrentHashMap> { + private static class DocCollectionWatches { + private static final ConcurrentHashMap> statefulWatchesByCollectionName = new ConcurrentHashMap<>(); /** * Gets the DocCollection (state) of the collection which the corresponding watch last observed @@ -260,7 +261,7 @@ private static class DocCollectionWatches * collection. */ private DocCollection getDocCollection(String collection) { - StatefulCollectionWatch watch = get(collection); + StatefulCollectionWatch watch = statefulWatchesByCollectionName.get(collection); return watch != null ? watch.currentState : null; } @@ -270,7 +271,7 @@ private DocCollection getDocCollection(String collection) { * @return an immutable set of active collection names */ private Set activeCollections() { - return this.entrySet().stream() + return statefulWatchesByCollectionName.entrySet().stream() .filter( (Entry> entry) -> entry.getValue().currentState != null) @@ -278,6 +279,10 @@ private Set activeCollections() { .collect(Collectors.toUnmodifiableSet()); } + private Set watchedCollections() { + return Collections.unmodifiableSet(statefulWatchesByCollectionName.keySet()); + } + /** * Updates the latest observed DocCollection (state) of the {@link StatefulCollectionWatch} if * the collection is being watched @@ -287,13 +292,12 @@ private Set activeCollections() { * @return whether an active watch exists for such collection */ private boolean updateDocCollection(String collection, DocCollection newState) { - StatefulCollectionWatch finalWatch = - computeIfPresent( + StatefulCollectionWatch finalWatch = statefulWatchesByCollectionName.computeIfPresent( collection, (col, watch) -> { DocCollection oldState = watch.currentState; if (oldState == null && newState == null) { - // OK, the collection not yet exist in ZK + // OK, the collection not yet exist in ZK or already deleted } else if (oldState == null) { if (log.isDebugEnabled()) { log.debug("Add data for [{}] ver [{}]", collection, newState.getZNodeVersion()); @@ -327,6 +331,17 @@ private boolean updateDocCollection(String collection, DocCollection newState) { }); return finalWatch != null; } + + /** + * Computes the new StatefulCollectionWatch by the supplied remappingFunction. + * @param collectionName collection name + * @param remappingFunction remaps the StatefulCollectionWatch. If this returns null, the associated StatefulCollectionWatch will be removed; otherwise, the returned value will be assigned to such collection + * @return the new StatefulCollectionWatch associated with the collection + */ + private StatefulCollectionWatch compute(String collectionName, + BiFunction, StatefulCollectionWatch> remappingFunction) { + return statefulWatchesByCollectionName.compute(collectionName, remappingFunction); + } } private static class StatefulCollectionWatch extends CollectionWatch { @@ -415,7 +430,7 @@ public void forciblyRefreshAllClusterStateSlow() throws KeeperException, Interru refreshLiveNodes(null); Set updatedCollections = new HashSet<>(); - for (String coll : collectionWatches.keySet()) { + for (String coll : collectionWatches.watchedCollections()) { DocCollection newState = fetchCollectionState(coll, null); if (collectionWatches.updateDocCollection(coll, newState)) { updatedCollections.add(coll); @@ -456,7 +471,7 @@ public void forceUpdateCollection(String collection) if (ref.get() != null) { return; } - } else if (collectionWatches.containsKey(collection)) { + } else if (collectionWatches.watchedCollections().contains(collection)) { // Exists as a watched collection, force a refresh. log.debug("Forcing refresh of watched collection state for {}", collection); DocCollection newState = fetchCollectionState(collection, null); @@ -611,13 +626,12 @@ private void constructState(Set changedCollections) { Map result = new LinkedHashMap<>(); // Add collections - for (Map.Entry> entry : - collectionWatches.entrySet()) { - if (entry.getValue().currentState != null) { + for (String watchedCollection : collectionWatches.watchedCollections()) { + DocCollection currentState = collectionWatches.getDocCollection(watchedCollection); + if (currentState != null) { // if the doc is null for the collection watch, then it should not be inserted into the // state - result.putIfAbsent( - entry.getKey(), new ClusterState.CollectionRef(entry.getValue().currentState)); + result.putIfAbsent(watchedCollection, new ClusterState.CollectionRef(currentState)); } } @@ -631,7 +645,7 @@ private void constructState(Set changedCollections) { if (log.isDebugEnabled()) { log.debug( "clusterStateSet: interesting [{}] watched [{}] lazy [{}] total [{}]", - collectionWatches.keySet().size(), + collectionWatches.watchedCollections().size(), collectionWatches.activeCollections().size(), lazyCollectionStates.keySet().size(), clusterState.getCollectionStates().size()); @@ -640,8 +654,8 @@ private void constructState(Set changedCollections) { if (log.isTraceEnabled()) { log.trace( "clusterStateSet: interesting [{}] watched [{}] lazy [{}] total [{}]", - collectionWatches.keySet(), - collectionWatches.activeCollections().size(), + collectionWatches.watchedCollections(), + collectionWatches.activeCollections(), lazyCollectionStates.keySet(), clusterState.getCollectionStates()); } @@ -655,7 +669,7 @@ private void constructState(Set changedCollections) { /** Refresh collections. */ private void refreshCollections() { - for (String coll : collectionWatches.keySet()) { + for (String coll : collectionWatches.watchedCollections()) { new StateWatcher(coll).refreshAndWatch(); } } @@ -685,7 +699,7 @@ private void refreshCollectionList(Watcher watcher) throws KeeperException, Inte this.lazyCollectionStates.keySet().retainAll(children); for (String coll : children) { // We will create an eager collection for any interesting collections, so don't add to lazy. - if (!collectionWatches.containsKey(coll)) { + if (!collectionWatches.watchedCollections().contains(coll)) { // Double check contains just to avoid allocating an object. LazyCollectionRef existing = lazyCollectionStates.get(coll); if (existing == null) { @@ -1362,7 +1376,7 @@ public void process(WatchedEvent event) { return; } - if (!collectionWatches.containsKey(coll)) { + if (!collectionWatches.watchedCollections().contains(coll)) { // This collection is no longer interesting, stop watching. log.debug("Uninteresting collection {}", coll); return; From 07730f5d44e79553d32656df3ae8e9fc5ef389a8 Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Wed, 6 Jul 2022 12:51:00 -0700 Subject: [PATCH 29/45] Refactoring after review. Creating a new branch as this seems to fail unit test cases in ZkStateReaderTest quite consistently --- .../org/apache/solr/common/cloud/ZkStateReader.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java index c9f5f1a8deb..6c89db61b32 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java @@ -283,6 +283,10 @@ private Set watchedCollections() { return Collections.unmodifiableSet(statefulWatchesByCollectionName.keySet()); } + private Set>> watchedCollectionEntries() { + return Collections.unmodifiableSet(statefulWatchesByCollectionName.entrySet()); + } + /** * Updates the latest observed DocCollection (state) of the {@link StatefulCollectionWatch} if * the collection is being watched @@ -626,12 +630,12 @@ private void constructState(Set changedCollections) { Map result = new LinkedHashMap<>(); // Add collections - for (String watchedCollection : collectionWatches.watchedCollections()) { - DocCollection currentState = collectionWatches.getDocCollection(watchedCollection); + for (Entry> entry : collectionWatches.watchedCollectionEntries()) { + DocCollection currentState = entry.getValue().currentState; if (currentState != null) { // if the doc is null for the collection watch, then it should not be inserted into the // state - result.putIfAbsent(watchedCollection, new ClusterState.CollectionRef(currentState)); + result.putIfAbsent(entry.getKey(), new ClusterState.CollectionRef(currentState)); } } From 90a3498c08aae764b19d663fef6e97103fb44efb Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Wed, 6 Jul 2022 12:57:32 -0700 Subject: [PATCH 30/45] Refactoring after review. Creating a new branch as this seems to fail unit test cases in ZkStateReaderTest quite consistently --- .../java/org/apache/solr/common/cloud/ZkStateReader.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java index 6c89db61b32..f92d0474de7 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java @@ -631,11 +631,11 @@ private void constructState(Set changedCollections) { // Add collections for (Entry> entry : collectionWatches.watchedCollectionEntries()) { - DocCollection currentState = entry.getValue().currentState; - if (currentState != null) { + if (entry.getValue().currentState != null) { // if the doc is null for the collection watch, then it should not be inserted into the // state - result.putIfAbsent(entry.getKey(), new ClusterState.CollectionRef(currentState)); + result.putIfAbsent( + entry.getKey(), new ClusterState.CollectionRef(entry.getValue().currentState)); } } From 3749dbc3ee3c6a6a9cf1c90d62f7ecd30e80c879 Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Wed, 6 Jul 2022 17:43:02 -0700 Subject: [PATCH 31/45] Fixed incorrect static field --- .../src/java/org/apache/solr/common/cloud/ZkStateReader.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java index f92d0474de7..bae7bcedd00 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java @@ -251,7 +251,7 @@ public boolean canBeRemoved() { *

Each watcher DocCollectionWatch also contains the latest DocCollection (state) observed */ private static class DocCollectionWatches { - private static final ConcurrentHashMap> statefulWatchesByCollectionName = new ConcurrentHashMap<>(); + private final ConcurrentHashMap> statefulWatchesByCollectionName = new ConcurrentHashMap<>(); /** * Gets the DocCollection (state) of the collection which the corresponding watch last observed From c0577d614c6350c80ce39320b3a67a3eb99c18ec Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Wed, 6 Jul 2022 17:58:21 -0700 Subject: [PATCH 32/45] Fixed unit test cases inti --- .../org/apache/solr/cloud/overseer/ZkStateReaderTest.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java index e864f4b01ec..7bccf783eed 100644 --- a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java @@ -47,6 +47,8 @@ import org.apache.solr.common.util.TimeSource; import org.apache.solr.handler.admin.ConfigSetsHandler; import org.apache.solr.util.TimeOut; +import org.junit.After; +import org.junit.Before; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -80,7 +82,7 @@ public void close() throws IOException { } private static TestFixture setupTestFixture(String testPrefix) throws Exception { - Path zkDir = createTempDir("testExternalCollectionWatchedNotWatched"); + Path zkDir = createTempDir(testPrefix); ZkTestServer server = new ZkTestServer(zkDir); server.run(); SolrZkClient zkClient = From be48a38cfac3a9c2f7081d792aca72855e7819fe Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Wed, 6 Jul 2022 18:03:54 -0700 Subject: [PATCH 33/45] Change after code review --- .../test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java index 7bccf783eed..6a10531f998 100644 --- a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java @@ -281,7 +281,7 @@ public void testForciblyRefreshAllClusterState() throws Exception { ref = reader.getClusterState().getCollectionRef("c2"); assertNotNull(ref); - assert (ref.isLazilyLoaded()); // c2 should be lazily loaded as it's not watched + assertTrue("c2 should have been lazily loaded but is not!", ref.isLazilyLoaded()); // c2 should be lazily loaded as it's not watched assertEquals(0, ref.get().getZNodeVersion()); } } From ac0c077c5b56b489ee11eead724cb6e5365292b2 Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Wed, 6 Jul 2022 18:07:55 -0700 Subject: [PATCH 34/45] ./gradlew :solr:core:spotlessApply and updated comment --- .../apache/solr/cloud/overseer/ZkStateReaderTest.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java index 6a10531f998..b5d66462813 100644 --- a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java @@ -47,8 +47,6 @@ import org.apache.solr.common.util.TimeSource; import org.apache.solr.handler.admin.ConfigSetsHandler; import org.apache.solr.util.TimeOut; -import org.junit.After; -import org.junit.Before; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -281,7 +279,9 @@ public void testForciblyRefreshAllClusterState() throws Exception { ref = reader.getClusterState().getCollectionRef("c2"); assertNotNull(ref); - assertTrue("c2 should have been lazily loaded but is not!", ref.isLazilyLoaded()); // c2 should be lazily loaded as it's not watched + assertTrue( + "c2 should have been lazily loaded but is not!", + ref.isLazilyLoaded()); // c2 should be lazily loaded as it's not watched assertEquals(0, ref.get().getZNodeVersion()); } } @@ -388,6 +388,9 @@ public void testWatchRaceCondition() throws Exception { latch.await(10, TimeUnit.SECONDS)); reader.removeDocCollectionWatcher("c1", dummyWatcher); + // cluster state might not be updated right the way from the removeDocCollectionWatcher call + // above as org.apache.solr.common.cloud.ZkStateReader.Notification might remove the watcher + // as well and might still be in the middle of updating the cluster state. TimeOut timeOut = new TimeOut(1000, TimeUnit.MILLISECONDS, TimeSource.NANO_TIME); timeOut.waitFor( "The ref is not lazily loaded after waiting", From 3edf56117314fce53daa45d3d2bcb27c753c2bfa Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Wed, 6 Jul 2022 18:10:31 -0700 Subject: [PATCH 35/45] ./gradlew :solr:solrj:spotlessApply --- .../solr/common/cloud/ZkStateReader.java | 33 +++++++++++++------ 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java index bae7bcedd00..fa558134e53 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java @@ -251,7 +251,8 @@ public boolean canBeRemoved() { *

Each watcher DocCollectionWatch also contains the latest DocCollection (state) observed */ private static class DocCollectionWatches { - private final ConcurrentHashMap> statefulWatchesByCollectionName = new ConcurrentHashMap<>(); + private final ConcurrentHashMap> + statefulWatchesByCollectionName = new ConcurrentHashMap<>(); /** * Gets the DocCollection (state) of the collection which the corresponding watch last observed @@ -261,7 +262,8 @@ private static class DocCollectionWatches { * collection. */ private DocCollection getDocCollection(String collection) { - StatefulCollectionWatch watch = statefulWatchesByCollectionName.get(collection); + StatefulCollectionWatch watch = + statefulWatchesByCollectionName.get(collection); return watch != null ? watch.currentState : null; } @@ -283,7 +285,8 @@ private Set watchedCollections() { return Collections.unmodifiableSet(statefulWatchesByCollectionName.keySet()); } - private Set>> watchedCollectionEntries() { + private Set>> + watchedCollectionEntries() { return Collections.unmodifiableSet(statefulWatchesByCollectionName.entrySet()); } @@ -296,7 +299,8 @@ private Set>> watche * @return whether an active watch exists for such collection */ private boolean updateDocCollection(String collection, DocCollection newState) { - StatefulCollectionWatch finalWatch = statefulWatchesByCollectionName.computeIfPresent( + StatefulCollectionWatch finalWatch = + statefulWatchesByCollectionName.computeIfPresent( collection, (col, watch) -> { DocCollection oldState = watch.currentState; @@ -338,12 +342,20 @@ private boolean updateDocCollection(String collection, DocCollection newState) { /** * Computes the new StatefulCollectionWatch by the supplied remappingFunction. - * @param collectionName collection name - * @param remappingFunction remaps the StatefulCollectionWatch. If this returns null, the associated StatefulCollectionWatch will be removed; otherwise, the returned value will be assigned to such collection + * + * @param collectionName collection name + * @param remappingFunction remaps the StatefulCollectionWatch. If this returns null, the + * associated StatefulCollectionWatch will be removed; otherwise, the returned value will be + * assigned to such collection * @return the new StatefulCollectionWatch associated with the collection */ - private StatefulCollectionWatch compute(String collectionName, - BiFunction, StatefulCollectionWatch> remappingFunction) { + private StatefulCollectionWatch compute( + String collectionName, + BiFunction< + String, + StatefulCollectionWatch, + StatefulCollectionWatch> + remappingFunction) { return statefulWatchesByCollectionName.compute(collectionName, remappingFunction); } } @@ -630,12 +642,13 @@ private void constructState(Set changedCollections) { Map result = new LinkedHashMap<>(); // Add collections - for (Entry> entry : collectionWatches.watchedCollectionEntries()) { + for (Entry> entry : + collectionWatches.watchedCollectionEntries()) { if (entry.getValue().currentState != null) { // if the doc is null for the collection watch, then it should not be inserted into the // state result.putIfAbsent( - entry.getKey(), new ClusterState.CollectionRef(entry.getValue().currentState)); + entry.getKey(), new ClusterState.CollectionRef(entry.getValue().currentState)); } } From 24a05df012f1354ab210fa8e9a593c249f8254ad Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Thu, 7 Jul 2022 12:11:00 -0700 Subject: [PATCH 36/45] Changes after code review --- .../cloud/overseer/ZkStateReaderTest.java | 444 +++++++++--------- .../solr/common/cloud/ZkStateReader.java | 39 +- 2 files changed, 250 insertions(+), 233 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java index b5d66462813..9da4a545417 100644 --- a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java @@ -47,6 +47,8 @@ import org.apache.solr.common.util.TimeSource; import org.apache.solr.handler.admin.ConfigSetsHandler; import org.apache.solr.util.TimeOut; +import org.junit.After; +import org.junit.Before; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -79,6 +81,22 @@ public void close() throws IOException { } } + private TestFixture fixture = null; + + @Before + public void setUp() throws Exception { + super.setUp(); + fixture = setupTestFixture(getTestName()); + } + + @After + public void tearDown() throws Exception { + if (fixture != null) { + fixture.close(); + } + super.tearDown(); + } + private static TestFixture setupTestFixture(String testPrefix) throws Exception { Path zkDir = createTempDir(testPrefix); ZkTestServer server = new ZkTestServer(zkDir); @@ -96,238 +114,225 @@ private static TestFixture setupTestFixture(String testPrefix) throws Exception } public void testExternalCollectionWatchedNotWatched() throws Exception { - try (TestFixture fixture = setupTestFixture("testExternalCollectionWatchedNotWatched")) { - ZkStateWriter writer = fixture.writer; - ZkStateReader reader = fixture.reader; - fixture.zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true); - - // create new collection - ZkWriteCommand c1 = - new ZkWriteCommand( - "c1", - new DocCollection( - "c1", - new HashMap<>(), - Map.of(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), - DocRouter.DEFAULT, - 0)); - - writer.enqueueUpdate(reader.getClusterState(), Collections.singletonList(c1), null); - writer.writePendingUpdates(); - reader.forceUpdateCollection("c1"); - - assertTrue(reader.getClusterState().getCollectionRef("c1").isLazilyLoaded()); - reader.registerCore("c1"); - assertFalse(reader.getClusterState().getCollectionRef("c1").isLazilyLoaded()); - reader.unregisterCore("c1"); - assertTrue(reader.getClusterState().getCollectionRef("c1").isLazilyLoaded()); - } + ZkStateWriter writer = fixture.writer; + ZkStateReader reader = fixture.reader; + fixture.zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true); + + // create new collection + ZkWriteCommand c1 = + new ZkWriteCommand( + "c1", + new DocCollection( + "c1", + new HashMap<>(), + Map.of(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), + DocRouter.DEFAULT, + 0)); + + writer.enqueueUpdate(reader.getClusterState(), Collections.singletonList(c1), null); + writer.writePendingUpdates(); + reader.forceUpdateCollection("c1"); + + assertTrue(reader.getClusterState().getCollectionRef("c1").isLazilyLoaded()); + reader.registerCore("c1"); + assertFalse(reader.getClusterState().getCollectionRef("c1").isLazilyLoaded()); + reader.unregisterCore("c1"); + assertTrue(reader.getClusterState().getCollectionRef("c1").isLazilyLoaded()); } public void testCollectionStateWatcherCaching() throws Exception { - try (TestFixture fixture = setupTestFixture("testCollectionStateWatcherCaching")) { - ZkStateWriter writer = fixture.writer; - ZkStateReader reader = fixture.reader; - - fixture.zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true); - - DocCollection state = - new DocCollection( - "c1", - new HashMap<>(), - Map.of(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), - DocRouter.DEFAULT, - 0); - ZkWriteCommand wc = new ZkWriteCommand("c1", state); - writer.enqueueUpdate(reader.getClusterState(), Collections.singletonList(wc), null); - writer.writePendingUpdates(); - assertTrue( - fixture.zkClient.exists(ZkStateReader.COLLECTIONS_ZKNODE + "/c1/state.json", true)); - reader.waitForState( - "c1", 1, TimeUnit.SECONDS, (liveNodes, collectionState) -> collectionState != null); - - Map props = new HashMap<>(); - props.put("x", "y"); - props.put(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME); - state = new DocCollection("c1", new HashMap<>(), props, DocRouter.DEFAULT, 0); - wc = new ZkWriteCommand("c1", state); - writer.enqueueUpdate(reader.getClusterState(), Collections.singletonList(wc), null); - writer.writePendingUpdates(); - - boolean found = false; - TimeOut timeOut = new TimeOut(5, TimeUnit.SECONDS, TimeSource.NANO_TIME); - while (!timeOut.hasTimedOut()) { - DocCollection c1 = reader.getClusterState().getCollection("c1"); - if ("y".equals(c1.getStr("x"))) { - found = true; - break; - } + ZkStateWriter writer = fixture.writer; + ZkStateReader reader = fixture.reader; + + fixture.zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true); + + DocCollection state = + new DocCollection( + "c1", + new HashMap<>(), + Map.of(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), + DocRouter.DEFAULT, + 0); + ZkWriteCommand wc = new ZkWriteCommand("c1", state); + writer.enqueueUpdate(reader.getClusterState(), Collections.singletonList(wc), null); + writer.writePendingUpdates(); + assertTrue(fixture.zkClient.exists(ZkStateReader.COLLECTIONS_ZKNODE + "/c1/state.json", true)); + reader.waitForState( + "c1", 1, TimeUnit.SECONDS, (liveNodes, collectionState) -> collectionState != null); + + Map props = new HashMap<>(); + props.put("x", "y"); + props.put(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME); + state = new DocCollection("c1", new HashMap<>(), props, DocRouter.DEFAULT, 0); + wc = new ZkWriteCommand("c1", state); + writer.enqueueUpdate(reader.getClusterState(), Collections.singletonList(wc), null); + writer.writePendingUpdates(); + + boolean found = false; + TimeOut timeOut = new TimeOut(5, TimeUnit.SECONDS, TimeSource.NANO_TIME); + while (!timeOut.hasTimedOut()) { + DocCollection c1 = reader.getClusterState().getCollection("c1"); + if ("y".equals(c1.getStr("x"))) { + found = true; + break; } - assertTrue("Could not find updated property in collection c1 even after 5 seconds", found); } + assertTrue("Could not find updated property in collection c1 even after 5 seconds", found); } public void testWatchedCollectionCreation() throws Exception { - try (TestFixture fixture = setupTestFixture("testWatchedCollectionCreation")) { - ZkStateWriter writer = fixture.writer; - ZkStateReader reader = fixture.reader; - - reader.registerCore("c1"); - - // Initially there should be no c1 collection. - assertNull(reader.getClusterState().getCollectionRef("c1")); - - fixture.zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true); - reader.forceUpdateCollection("c1"); - - // Still no c1 collection, despite a collection path. - assertNull(reader.getClusterState().getCollectionRef("c1")); - - // create new collection - DocCollection state = - new DocCollection( - "c1", - new HashMap<>(), - Map.of(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), - DocRouter.DEFAULT, - 0); - ZkWriteCommand wc = new ZkWriteCommand("c1", state); - writer.enqueueUpdate(reader.getClusterState(), Collections.singletonList(wc), null); - writer.writePendingUpdates(); - - assertTrue( - fixture.zkClient.exists(ZkStateReader.COLLECTIONS_ZKNODE + "/c1/state.json", true)); - - // reader.forceUpdateCollection("c1"); - reader.waitForState("c1", TIMEOUT, TimeUnit.SECONDS, (n, c) -> c != null); - ClusterState.CollectionRef ref = reader.getClusterState().getCollectionRef("c1"); - assertNotNull(ref); - assertFalse(ref.isLazilyLoaded()); - } + ZkStateWriter writer = fixture.writer; + ZkStateReader reader = fixture.reader; + + reader.registerCore("c1"); + + // Initially there should be no c1 collection. + assertNull(reader.getClusterState().getCollectionRef("c1")); + + fixture.zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true); + reader.forceUpdateCollection("c1"); + + // Still no c1 collection, despite a collection path. + assertNull(reader.getClusterState().getCollectionRef("c1")); + + // create new collection + DocCollection state = + new DocCollection( + "c1", + new HashMap<>(), + Map.of(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), + DocRouter.DEFAULT, + 0); + ZkWriteCommand wc = new ZkWriteCommand("c1", state); + writer.enqueueUpdate(reader.getClusterState(), Collections.singletonList(wc), null); + writer.writePendingUpdates(); + + assertTrue(fixture.zkClient.exists(ZkStateReader.COLLECTIONS_ZKNODE + "/c1/state.json", true)); + + // reader.forceUpdateCollection("c1"); + reader.waitForState("c1", TIMEOUT, TimeUnit.SECONDS, (n, c) -> c != null); + ClusterState.CollectionRef ref = reader.getClusterState().getCollectionRef("c1"); + assertNotNull(ref); + assertFalse(ref.isLazilyLoaded()); } public void testForciblyRefreshAllClusterState() throws Exception { - try (TestFixture fixture = setupTestFixture("testForciblyRefreshAllClusterState")) { - ZkStateWriter writer = fixture.writer; - ZkStateReader reader = fixture.reader; - - reader.registerCore("c1"); // watching c1, so it should get non lazy reference - fixture.zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true); - - reader.forciblyRefreshAllClusterStateSlow(); - // Initially there should be no c1 collection. - assertNull(reader.getClusterState().getCollectionRef("c1")); - - // create new collection - DocCollection state = - new DocCollection( - "c1", - new HashMap<>(), - Map.of(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), - DocRouter.DEFAULT, - 0); - ZkWriteCommand wc = new ZkWriteCommand("c1", state); - writer.enqueueUpdate(reader.getClusterState(), Collections.singletonList(wc), null); - writer.writePendingUpdates(); - - assertTrue( - fixture.zkClient.exists(ZkStateReader.COLLECTIONS_ZKNODE + "/c1/state.json", true)); - - reader.forciblyRefreshAllClusterStateSlow(); - ClusterState.CollectionRef ref = reader.getClusterState().getCollectionRef("c1"); - assertNotNull(ref); - assertFalse(ref.isLazilyLoaded()); - assertEquals(0, ref.get().getZNodeVersion()); - - // update the collection - state = - new DocCollection( - "c1", - new HashMap<>(), - Map.of(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), - DocRouter.DEFAULT, - ref.get().getZNodeVersion()); - wc = new ZkWriteCommand("c1", state); - writer.enqueueUpdate(reader.getClusterState(), Collections.singletonList(wc), null); - writer.writePendingUpdates(); - - reader.forciblyRefreshAllClusterStateSlow(); - ref = reader.getClusterState().getCollectionRef("c1"); - assertNotNull(ref); - assertFalse(ref.isLazilyLoaded()); - assertEquals(1, ref.get().getZNodeVersion()); - - // delete the collection c1, add a collection c2 that is NOT watched - ZkWriteCommand wc1 = new ZkWriteCommand("c1", null); - - fixture.zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c2", true); - state = - new DocCollection( - "c2", - new HashMap<>(), - Map.of(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), - DocRouter.DEFAULT, - 0); - ZkWriteCommand wc2 = new ZkWriteCommand("c2", state); - - writer.enqueueUpdate(reader.getClusterState(), Arrays.asList(wc1, wc2), null); - writer.writePendingUpdates(); - - reader.forciblyRefreshAllClusterStateSlow(); - ref = reader.getClusterState().getCollectionRef("c1"); - assertNull(ref); - - ref = reader.getClusterState().getCollectionRef("c2"); - assertNotNull(ref); - assertTrue( - "c2 should have been lazily loaded but is not!", - ref.isLazilyLoaded()); // c2 should be lazily loaded as it's not watched - assertEquals(0, ref.get().getZNodeVersion()); - } + ZkStateWriter writer = fixture.writer; + ZkStateReader reader = fixture.reader; + + reader.registerCore("c1"); // watching c1, so it should get non lazy reference + fixture.zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true); + + reader.forciblyRefreshAllClusterStateSlow(); + // Initially there should be no c1 collection. + assertNull(reader.getClusterState().getCollectionRef("c1")); + + // create new collection + DocCollection state = + new DocCollection( + "c1", + new HashMap<>(), + Map.of(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), + DocRouter.DEFAULT, + 0); + ZkWriteCommand wc = new ZkWriteCommand("c1", state); + writer.enqueueUpdate(reader.getClusterState(), Collections.singletonList(wc), null); + writer.writePendingUpdates(); + + assertTrue(fixture.zkClient.exists(ZkStateReader.COLLECTIONS_ZKNODE + "/c1/state.json", true)); + + reader.forciblyRefreshAllClusterStateSlow(); + ClusterState.CollectionRef ref = reader.getClusterState().getCollectionRef("c1"); + assertNotNull(ref); + assertFalse(ref.isLazilyLoaded()); + assertEquals(0, ref.get().getZNodeVersion()); + + // update the collection + state = + new DocCollection( + "c1", + new HashMap<>(), + Map.of(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), + DocRouter.DEFAULT, + ref.get().getZNodeVersion()); + wc = new ZkWriteCommand("c1", state); + writer.enqueueUpdate(reader.getClusterState(), Collections.singletonList(wc), null); + writer.writePendingUpdates(); + + reader.forciblyRefreshAllClusterStateSlow(); + ref = reader.getClusterState().getCollectionRef("c1"); + assertNotNull(ref); + assertFalse(ref.isLazilyLoaded()); + assertEquals(1, ref.get().getZNodeVersion()); + + // delete the collection c1, add a collection c2 that is NOT watched + ZkWriteCommand wc1 = new ZkWriteCommand("c1", null); + + fixture.zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c2", true); + state = + new DocCollection( + "c2", + new HashMap<>(), + Map.of(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), + DocRouter.DEFAULT, + 0); + ZkWriteCommand wc2 = new ZkWriteCommand("c2", state); + + writer.enqueueUpdate(reader.getClusterState(), Arrays.asList(wc1, wc2), null); + writer.writePendingUpdates(); + + reader.forciblyRefreshAllClusterStateSlow(); + ref = reader.getClusterState().getCollectionRef("c1"); + assertNull(ref); + + ref = reader.getClusterState().getCollectionRef("c2"); + assertNotNull(ref); + assertTrue( + "c2 should have been lazily loaded but is not!", + ref.isLazilyLoaded()); // c2 should be lazily loaded as it's not watched + assertEquals(0, ref.get().getZNodeVersion()); } public void testGetCurrentCollections() throws Exception { - try (TestFixture fixture = setupTestFixture("testGetCurrentCollections")) { - ZkStateWriter writer = fixture.writer; - ZkStateReader reader = fixture.reader; - - reader.registerCore("c1"); // listen to c1. not yet exist - fixture.zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true); - reader.forceUpdateCollection("c1"); - Set currentCollections = reader.getCurrentCollections(); - assertEquals(0, currentCollections.size()); // no active collections yet - - // now create both c1 (watched) and c2 (not watched) - DocCollection state1 = - new DocCollection( - "c1", - new HashMap<>(), - Map.of(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), - DocRouter.DEFAULT, - 0); - ZkWriteCommand wc1 = new ZkWriteCommand("c1", state1); - DocCollection state2 = - new DocCollection( - "c2", - new HashMap<>(), - Map.of(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), - DocRouter.DEFAULT, - 0); - - // do not listen to c2 - fixture.zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c2", true); - ZkWriteCommand wc2 = new ZkWriteCommand("c2", state2); - - writer.enqueueUpdate(reader.getClusterState(), Arrays.asList(wc1, wc2), null); - writer.writePendingUpdates(); - - reader.forceUpdateCollection("c1"); - reader.forceUpdateCollection("c2"); - currentCollections = - reader.getCurrentCollections(); // should detect both collections (c1 watched, c2 lazy - // loaded) - assertEquals(2, currentCollections.size()); - } + ZkStateWriter writer = fixture.writer; + ZkStateReader reader = fixture.reader; + + reader.registerCore("c1"); // listen to c1. not yet exist + fixture.zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true); + reader.forceUpdateCollection("c1"); + Set currentCollections = reader.getCurrentCollections(); + assertEquals(0, currentCollections.size()); // no active collections yet + + // now create both c1 (watched) and c2 (not watched) + DocCollection state1 = + new DocCollection( + "c1", + new HashMap<>(), + Map.of(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), + DocRouter.DEFAULT, + 0); + ZkWriteCommand wc1 = new ZkWriteCommand("c1", state1); + DocCollection state2 = + new DocCollection( + "c2", + new HashMap<>(), + Map.of(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), + DocRouter.DEFAULT, + 0); + + // do not listen to c2 + fixture.zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c2", true); + ZkWriteCommand wc2 = new ZkWriteCommand("c2", state2); + + writer.enqueueUpdate(reader.getClusterState(), Arrays.asList(wc1, wc2), null); + writer.writePendingUpdates(); + + reader.forceUpdateCollection("c1"); + reader.forceUpdateCollection("c2"); + currentCollections = + reader.getCurrentCollections(); // should detect both collections (c1 watched, c2 lazy + // loaded) + assertEquals(2, currentCollections.size()); } public void testWatchRaceCondition() throws Exception { @@ -336,7 +341,7 @@ public void testWatchRaceCondition() throws Exception { ExecutorUtil.newMDCAwareSingleThreadExecutor( new SolrNamedThreadFactory("zkStateReaderTest")); - try (TestFixture fixture = setupTestFixture("testWatchRaceCondition")) { + try { ZkStateWriter writer = fixture.writer; final ZkStateReader reader = fixture.reader; fixture.zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true); @@ -401,7 +406,6 @@ public void testWatchRaceCondition() throws Exception { if (updateException.get() != null) { throw (updateException.get()); } - } finally { ExecutorUtil.awaitTermination(executorService); } diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java index fa558134e53..e4744664280 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java @@ -251,7 +251,7 @@ public boolean canBeRemoved() { *

Each watcher DocCollectionWatch also contains the latest DocCollection (state) observed */ private static class DocCollectionWatches { - private final ConcurrentHashMap> + private final ConcurrentHashMap statefulWatchesByCollectionName = new ConcurrentHashMap<>(); /** @@ -262,7 +262,7 @@ private static class DocCollectionWatches { * collection. */ private DocCollection getDocCollection(String collection) { - StatefulCollectionWatch watch = + StatefulCollectionWatch watch = statefulWatchesByCollectionName.get(collection); return watch != null ? watch.currentState : null; } @@ -275,17 +275,30 @@ private DocCollection getDocCollection(String collection) { private Set activeCollections() { return statefulWatchesByCollectionName.entrySet().stream() .filter( - (Entry> entry) -> + (Entry entry) -> entry.getValue().currentState != null) .map(Entry::getKey) .collect(Collectors.toUnmodifiableSet()); } + /** + * Gets the count of active collections (collections that exist) being watched + * + * @return the count of active collections + */ + private long activeCollectionCount() { + return statefulWatchesByCollectionName.entrySet().stream() + .filter( + (Entry entry) -> + entry.getValue().currentState != null) + .count(); + } + private Set watchedCollections() { return Collections.unmodifiableSet(statefulWatchesByCollectionName.keySet()); } - private Set>> + private Set> watchedCollectionEntries() { return Collections.unmodifiableSet(statefulWatchesByCollectionName.entrySet()); } @@ -299,7 +312,7 @@ private Set watchedCollections() { * @return whether an active watch exists for such collection */ private boolean updateDocCollection(String collection, DocCollection newState) { - StatefulCollectionWatch finalWatch = + StatefulCollectionWatch finalWatch = statefulWatchesByCollectionName.computeIfPresent( collection, (col, watch) -> { @@ -349,18 +362,18 @@ private boolean updateDocCollection(String collection, DocCollection newState) { * assigned to such collection * @return the new StatefulCollectionWatch associated with the collection */ - private StatefulCollectionWatch compute( + private StatefulCollectionWatch compute( String collectionName, BiFunction< String, - StatefulCollectionWatch, - StatefulCollectionWatch> + StatefulCollectionWatch, + StatefulCollectionWatch> remappingFunction) { return statefulWatchesByCollectionName.compute(collectionName, remappingFunction); } } - private static class StatefulCollectionWatch extends CollectionWatch { + private static class StatefulCollectionWatch extends CollectionWatch { private DocCollection currentState; } @@ -642,7 +655,7 @@ private void constructState(Set changedCollections) { Map result = new LinkedHashMap<>(); // Add collections - for (Entry> entry : + for (Entry entry : collectionWatches.watchedCollectionEntries()) { if (entry.getValue().currentState != null) { // if the doc is null for the collection watch, then it should not be inserted into the @@ -663,7 +676,7 @@ private void constructState(Set changedCollections) { log.debug( "clusterStateSet: interesting [{}] watched [{}] lazy [{}] total [{}]", collectionWatches.watchedCollections().size(), - collectionWatches.activeCollections().size(), + collectionWatches.activeCollectionCount(), lazyCollectionStates.keySet().size(), clusterState.getCollectionStates().size()); } @@ -1735,7 +1748,7 @@ public void registerCore(String collection) { (k, v) -> { if (v == null) { reconstructState.set(true); - v = new StatefulCollectionWatch<>(); + v = new StatefulCollectionWatch(); } v.coreRefCount++; return v; @@ -1819,7 +1832,7 @@ public void registerDocCollectionWatcher(String collection, DocCollectionWatcher collection, (k, v) -> { if (v == null) { - v = new StatefulCollectionWatch<>(); + v = new StatefulCollectionWatch(); watchSet.set(true); } v.stateWatchers.add(stateWatcher); From c39e2a977b6157eaea7ad334e507ad484f80e342 Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Thu, 14 Jul 2022 13:04:22 -0700 Subject: [PATCH 37/45] Use delay injection in race condition unit test case instead of running 10k times --- .../cloud/overseer/ZkStateReaderTest.java | 51 ++++++++++--------- .../solr/common/cloud/ZkStateReader.java | 2 + .../solr/common/util/CommonTestInjection.java | 31 +++++++++++ 3 files changed, 59 insertions(+), 25 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java index 9da4a545417..2a0b3501c21 100644 --- a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java @@ -42,6 +42,7 @@ import org.apache.solr.common.cloud.DocRouter; import org.apache.solr.common.cloud.SolrZkClient; import org.apache.solr.common.cloud.ZkStateReader; +import org.apache.solr.common.util.CommonTestInjection; import org.apache.solr.common.util.ExecutorUtil; import org.apache.solr.common.util.SolrNamedThreadFactory; import org.apache.solr.common.util.TimeSource; @@ -336,11 +337,10 @@ public void testGetCurrentCollections() throws Exception { } public void testWatchRaceCondition() throws Exception { - final int RUN_COUNT = 10000; ExecutorService executorService = ExecutorUtil.newMDCAwareSingleThreadExecutor( new SolrNamedThreadFactory("zkStateReaderTest")); - + CommonTestInjection.setDelay(1000); try { ZkStateWriter writer = fixture.writer; final ZkStateReader reader = fixture.reader; @@ -378,35 +378,36 @@ public void testWatchRaceCondition() throws Exception { }); executorService.shutdown(); - for (int i = 0; i < RUN_COUNT; i++) { - final CountDownLatch latch = new CountDownLatch(2); - - // remove itself on 2nd trigger - DocCollectionWatcher dummyWatcher = - collection -> { - latch.countDown(); - return latch.getCount() == 0; - }; - reader.registerDocCollectionWatcher("c1", dummyWatcher); - assertTrue( - "Missing expected collection updates after the wait", - latch.await(10, TimeUnit.SECONDS)); - reader.removeDocCollectionWatcher("c1", dummyWatcher); - - // cluster state might not be updated right the way from the removeDocCollectionWatcher call - // above as org.apache.solr.common.cloud.ZkStateReader.Notification might remove the watcher - // as well and might still be in the middle of updating the cluster state. - TimeOut timeOut = new TimeOut(1000, TimeUnit.MILLISECONDS, TimeSource.NANO_TIME); - timeOut.waitFor( - "The ref is not lazily loaded after waiting", - () -> reader.getClusterState().getCollectionRef("c1").isLazilyLoaded()); - } + + final CountDownLatch latch = new CountDownLatch(2); + + // remove itself on 2nd trigger + DocCollectionWatcher dummyWatcher = + collection -> { + latch.countDown(); + return latch.getCount() == 0; + }; + reader.registerDocCollectionWatcher("c1", dummyWatcher); + assertTrue( + "Missing expected collection updates after the wait", + latch.await(10, TimeUnit.SECONDS)); + reader.removeDocCollectionWatcher("c1", dummyWatcher); + + // cluster state might not be updated right the way from the removeDocCollectionWatcher call + // above as org.apache.solr.common.cloud.ZkStateReader.Notification might remove the watcher + // as well and might still be in the middle of updating the cluster state. + TimeOut timeOut = new TimeOut(2000, TimeUnit.MILLISECONDS, TimeSource.NANO_TIME); + timeOut.waitFor( + "The ref is not lazily loaded after waiting", + () -> reader.getClusterState().getCollectionRef("c1").isLazilyLoaded()); + stopMutatingThread.set(true); if (updateException.get() != null) { throw (updateException.get()); } } finally { + CommonTestInjection.reset(); ExecutorUtil.awaitTermination(executorService); } } diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java index e4744664280..c40b383ca07 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java @@ -55,6 +55,7 @@ import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.params.CollectionAdminParams; import org.apache.solr.common.params.CoreAdminParams; +import org.apache.solr.common.util.CommonTestInjection; import org.apache.solr.common.util.ExecutorUtil; import org.apache.solr.common.util.ObjectReleaseTracker; import org.apache.solr.common.util.Pair; @@ -2056,6 +2057,7 @@ public void removeDocCollectionWatcher(String collection, DocCollectionWatcher w if (v.canBeRemoved()) { lazyCollectionStates.put(collection, new LazyCollectionRef(collection)); reconstructState.set(true); + CommonTestInjection.injectDelay(); //To unit test race condition return null; } return v; diff --git a/solr/solrj/src/java/org/apache/solr/common/util/CommonTestInjection.java b/solr/solrj/src/java/org/apache/solr/common/util/CommonTestInjection.java index c837b895049..0374007db0e 100644 --- a/solr/solrj/src/java/org/apache/solr/common/util/CommonTestInjection.java +++ b/solr/solrj/src/java/org/apache/solr/common/util/CommonTestInjection.java @@ -17,6 +17,10 @@ package org.apache.solr.common.util; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.lang.invoke.MethodHandles; import java.util.Map; /** @@ -25,11 +29,14 @@ * @lucene.internal */ public class CommonTestInjection { + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static volatile Map additionalSystemProps = null; + private volatile static Integer delay = null; public static void reset() { additionalSystemProps = null; + delay = null; } public static void setAdditionalProps(Map additionalSystemProps) { @@ -39,4 +46,28 @@ public static void setAdditionalProps(Map additionalSystemProps) public static Map injectAdditionalProps() { return additionalSystemProps; } + + /** + * Set test delay (sleep) in unit of millisec + * @param delay delay in millisec, null to remove such delay + */ + public static void setDelay(Integer delay) { + CommonTestInjection.delay = delay; + } + + /** + * Inject an artificial delay(sleep) into the code + * @return true + */ + public static boolean injectDelay() { + if (delay != null) { + try { + log.info("Inserting artificial delay for {}ms", delay); + Thread.sleep(delay); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + return true; + } } From 0feb4e61bd02afdfff282b4495bf2bf8cf5050d3 Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Thu, 14 Jul 2022 14:15:15 -0700 Subject: [PATCH 38/45] Further improved the unit test case --- .../cloud/overseer/ZkStateReaderTest.java | 22 ++++++++++++++----- .../solr/common/cloud/ZkStateReader.java | 14 ++++-------- .../solr/common/util/CommonTestInjection.java | 14 +++++++----- 3 files changed, 28 insertions(+), 22 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java index 2a0b3501c21..77be21e338d 100644 --- a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java @@ -336,19 +336,25 @@ public void testGetCurrentCollections() throws Exception { assertEquals(2, currentCollections.size()); } + /** + * Simulates race condition that might arise when state updates triggered by watch notification + * contend with removal of collection watches. + * + *

Such race condition should no longer exist with the new code that uses a single map for both + * "collection watches" and "latest state of watched collection" + */ public void testWatchRaceCondition() throws Exception { ExecutorService executorService = ExecutorUtil.newMDCAwareSingleThreadExecutor( new SolrNamedThreadFactory("zkStateReaderTest")); CommonTestInjection.setDelay(1000); + final AtomicBoolean stopMutatingThread = new AtomicBoolean(false); try { ZkStateWriter writer = fixture.writer; final ZkStateReader reader = fixture.reader; fixture.zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true); // start another thread to constantly updating the state - final AtomicBoolean stopMutatingThread = new AtomicBoolean(false); - final AtomicReference updateException = new AtomicReference<>(); executorService.submit( () -> { @@ -370,6 +376,7 @@ public void testWatchRaceCondition() throws Exception { ZkWriteCommand wc = new ZkWriteCommand("c1", state); writer.enqueueUpdate(clusterState, Collections.singletonList(wc), null); clusterState = writer.writePendingUpdates(); + TimeUnit.MILLISECONDS.sleep(100); } } catch (Exception e) { updateException.set(e); @@ -378,6 +385,11 @@ public void testWatchRaceCondition() throws Exception { }); executorService.shutdown(); + reader.waitForState( + "c1", + 10, + TimeUnit.SECONDS, + slices -> slices != null); // wait for the state to become available final CountDownLatch latch = new CountDownLatch(2); @@ -389,8 +401,7 @@ public void testWatchRaceCondition() throws Exception { }; reader.registerDocCollectionWatcher("c1", dummyWatcher); assertTrue( - "Missing expected collection updates after the wait", - latch.await(10, TimeUnit.SECONDS)); + "Missing expected collection updates after the wait", latch.await(10, TimeUnit.SECONDS)); reader.removeDocCollectionWatcher("c1", dummyWatcher); // cluster state might not be updated right the way from the removeDocCollectionWatcher call @@ -401,12 +412,11 @@ public void testWatchRaceCondition() throws Exception { "The ref is not lazily loaded after waiting", () -> reader.getClusterState().getCollectionRef("c1").isLazilyLoaded()); - - stopMutatingThread.set(true); if (updateException.get() != null) { throw (updateException.get()); } } finally { + stopMutatingThread.set(true); CommonTestInjection.reset(); ExecutorUtil.awaitTermination(executorService); } diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java index c40b383ca07..fc064aad900 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java @@ -263,8 +263,7 @@ private static class DocCollectionWatches { * collection. */ private DocCollection getDocCollection(String collection) { - StatefulCollectionWatch watch = - statefulWatchesByCollectionName.get(collection); + StatefulCollectionWatch watch = statefulWatchesByCollectionName.get(collection); return watch != null ? watch.currentState : null; } @@ -299,8 +298,7 @@ private Set watchedCollections() { return Collections.unmodifiableSet(statefulWatchesByCollectionName.keySet()); } - private Set> - watchedCollectionEntries() { + private Set> watchedCollectionEntries() { return Collections.unmodifiableSet(statefulWatchesByCollectionName.entrySet()); } @@ -365,11 +363,7 @@ private boolean updateDocCollection(String collection, DocCollection newState) { */ private StatefulCollectionWatch compute( String collectionName, - BiFunction< - String, - StatefulCollectionWatch, - StatefulCollectionWatch> - remappingFunction) { + BiFunction remappingFunction) { return statefulWatchesByCollectionName.compute(collectionName, remappingFunction); } } @@ -2057,7 +2051,7 @@ public void removeDocCollectionWatcher(String collection, DocCollectionWatcher w if (v.canBeRemoved()) { lazyCollectionStates.put(collection, new LazyCollectionRef(collection)); reconstructState.set(true); - CommonTestInjection.injectDelay(); //To unit test race condition + CommonTestInjection.injectDelay(); // To unit test race condition return null; } return v; diff --git a/solr/solrj/src/java/org/apache/solr/common/util/CommonTestInjection.java b/solr/solrj/src/java/org/apache/solr/common/util/CommonTestInjection.java index 0374007db0e..7373c883041 100644 --- a/solr/solrj/src/java/org/apache/solr/common/util/CommonTestInjection.java +++ b/solr/solrj/src/java/org/apache/solr/common/util/CommonTestInjection.java @@ -17,11 +17,10 @@ package org.apache.solr.common.util; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import java.lang.invoke.MethodHandles; import java.util.Map; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Allows random faults to be injected in running code during test runs across all solr packages. @@ -32,7 +31,7 @@ public class CommonTestInjection { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static volatile Map additionalSystemProps = null; - private volatile static Integer delay = null; + private static volatile Integer delay = null; public static void reset() { additionalSystemProps = null; @@ -49,6 +48,7 @@ public static Map injectAdditionalProps() { /** * Set test delay (sleep) in unit of millisec + * * @param delay delay in millisec, null to remove such delay */ public static void setDelay(Integer delay) { @@ -57,13 +57,15 @@ public static void setDelay(Integer delay) { /** * Inject an artificial delay(sleep) into the code - * @return true + * + * @return true */ public static boolean injectDelay() { if (delay != null) { try { - log.info("Inserting artificial delay for {}ms", delay); + log.info("Start: artificial delay for {}ms", delay); Thread.sleep(delay); + log.info("Finish: artificial delay for {}ms", delay); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } From 45181fb510248ad65d63a49c3c0cf843052c4b24 Mon Sep 17 00:00:00 2001 From: patsonluk Date: Thu, 14 Jul 2022 15:57:49 -0700 Subject: [PATCH 39/45] Update solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java Co-authored-by: Houston Putman --- .../solr/common/cloud/ZkStateReader.java | 80 ++++++++++--------- 1 file changed, 41 insertions(+), 39 deletions(-) diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java index fc064aad900..e059f86a843 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java @@ -308,48 +308,50 @@ private Set> watchedCollectionEntries() { * * @param collection the collection name * @param newState the new DocCollection (state) observed - * @return whether an active watch exists for such collection + * @return whether the state has changed for the watched collection */ private boolean updateDocCollection(String collection, DocCollection newState) { - StatefulCollectionWatch finalWatch = - statefulWatchesByCollectionName.computeIfPresent( - collection, - (col, watch) -> { - DocCollection oldState = watch.currentState; - if (oldState == null && newState == null) { - // OK, the collection not yet exist in ZK or already deleted - } else if (oldState == null) { - if (log.isDebugEnabled()) { - log.debug("Add data for [{}] ver [{}]", collection, newState.getZNodeVersion()); - } - watch.currentState = newState; - } else if (newState == null) { - log.debug("Removing cached collection state for [{}]", collection); - watch.currentState = null; - } else { // both new and old states are non-null - int oldCVersion = - oldState.getPerReplicaStates() == null - ? -1 - : oldState.getPerReplicaStates().cversion; - int newCVersion = - newState.getPerReplicaStates() == null - ? -1 - : newState.getPerReplicaStates().cversion; - if (oldState.getZNodeVersion() < newState.getZNodeVersion() - || oldCVersion < newCVersion) { - watch.currentState = newState; - if (log.isDebugEnabled()) { - log.debug( - "Updating data for [{}] from [{}] to [{}]", - collection, - oldState.getZNodeVersion(), - newState.getZNodeVersion()); - } - } + AtomicBoolean stateHasChanged = new AtomicBoolean(false); + statefulWatchesByCollectionName.computeIfPresent( + collection, + (col, watch) -> { + DocCollection oldState = watch.currentState; + if (oldState == null && newState == null) { + // OK, the collection not yet exist in ZK or already deleted + } else if (oldState == null) { + if (log.isDebugEnabled()) { + log.debug("Add data for [{}] ver [{}]", collection, newState.getZNodeVersion()); + } + watch.currentState = newState; + } else if (newState == null) { + log.debug("Removing cached collection state for [{}]", collection); + watch.currentState = null; + } else { // both new and old states are non-null + int oldCVersion = + oldState.getPerReplicaStates() == null + ? -1 + : oldState.getPerReplicaStates().cversion; + int newCVersion = + newState.getPerReplicaStates() == null + ? -1 + : newState.getPerReplicaStates().cversion; + if (oldState.getZNodeVersion() < newState.getZNodeVersion() + || oldCVersion < newCVersion) { + watch.currentState = newState; + if (log.isDebugEnabled()) { + log.debug( + "Updating data for [{}] from [{}] to [{}]", + collection, + oldState.getZNodeVersion(), + newState.getZNodeVersion()); } - return watch; - }); - return finalWatch != null; + } + } + stateHasChanged.set(oldState != watch.currentState); + return watch; + }); + + return stateHasChanged.get(); } /** From c0dfcf8cce3eea4ed3eadd4f5fd50c331d7c4644 Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Fri, 15 Jul 2022 10:23:14 -0700 Subject: [PATCH 40/45] Added comment for concurrent access concerns --- .../java/org/apache/solr/common/cloud/ZkStateReader.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java index e059f86a843..0efb6ce423d 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java @@ -294,6 +294,10 @@ private long activeCollectionCount() { .count(); } + /** + * Gets a Set of watched collection names. The returned value is thread-safe and unmodifiable. + * @return Set of watched collection names + */ private Set watchedCollections() { return Collections.unmodifiableSet(statefulWatchesByCollectionName.keySet()); } @@ -456,6 +460,11 @@ public void forciblyRefreshAllClusterStateSlow() throws KeeperException, Interru refreshLiveNodes(null); Set updatedCollections = new HashSet<>(); + + //Iterate through the actively watched collections. Take note that the returned watched collections + //might change during the iteration, but it should not throw exception as it's thread-safe. + //If such set is modified elsewhere during the iteration, the code logic should still handle such + //missing/extra collection w/o issues. for (String coll : collectionWatches.watchedCollections()) { DocCollection newState = fetchCollectionState(coll, null); if (collectionWatches.updateDocCollection(coll, newState)) { From 14afab4803e441972d415dcc14158310f6fafd9d Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Fri, 15 Jul 2022 10:51:10 -0700 Subject: [PATCH 41/45] Changes after code review --- .../org/apache/solr/cloud/overseer/ZkStateReaderTest.java | 6 +++--- .../java/org/apache/solr/common/cloud/ZkStateReader.java | 6 +++--- .../org/apache/solr/common/util/CommonTestInjection.java | 3 ++- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java index 77be21e338d..50cfba04167 100644 --- a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java @@ -330,9 +330,9 @@ public void testGetCurrentCollections() throws Exception { reader.forceUpdateCollection("c1"); reader.forceUpdateCollection("c2"); - currentCollections = - reader.getCurrentCollections(); // should detect both collections (c1 watched, c2 lazy - // loaded) + + // should detect both collections (c1 watched, c2 lazy loaded) + currentCollections = reader.getCurrentCollections(); assertEquals(2, currentCollections.size()); } diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java index 0efb6ce423d..138b470fea2 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java @@ -366,6 +366,7 @@ private boolean updateDocCollection(String collection, DocCollection newState) { * associated StatefulCollectionWatch will be removed; otherwise, the returned value will be * assigned to such collection * @return the new StatefulCollectionWatch associated with the collection + * @see ConcurrentHashMap#compute(Object, BiFunction) */ private StatefulCollectionWatch compute( String collectionName, @@ -666,8 +667,7 @@ private void constructState(Set changedCollections) { if (entry.getValue().currentState != null) { // if the doc is null for the collection watch, then it should not be inserted into the // state - result.putIfAbsent( - entry.getKey(), new ClusterState.CollectionRef(entry.getValue().currentState)); + result.put(entry.getKey(), new ClusterState.CollectionRef(entry.getValue().currentState)); } } @@ -2062,7 +2062,7 @@ public void removeDocCollectionWatcher(String collection, DocCollectionWatcher w if (v.canBeRemoved()) { lazyCollectionStates.put(collection, new LazyCollectionRef(collection)); reconstructState.set(true); - CommonTestInjection.injectDelay(); // To unit test race condition + assert CommonTestInjection.injectDelay(); // To unit test race condition return null; } return v; diff --git a/solr/solrj/src/java/org/apache/solr/common/util/CommonTestInjection.java b/solr/solrj/src/java/org/apache/solr/common/util/CommonTestInjection.java index 7373c883041..e1d7caa8dd9 100644 --- a/solr/solrj/src/java/org/apache/solr/common/util/CommonTestInjection.java +++ b/solr/solrj/src/java/org/apache/solr/common/util/CommonTestInjection.java @@ -65,9 +65,10 @@ public static boolean injectDelay() { try { log.info("Start: artificial delay for {}ms", delay); Thread.sleep(delay); - log.info("Finish: artificial delay for {}ms", delay); } catch (InterruptedException e) { Thread.currentThread().interrupt(); + } finally { + log.info("Finish: artificial delay for {}ms", delay); } } return true; From 7bad9243fb8b93fb9283cbb27769bee5b97cec2a Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Fri, 15 Jul 2022 15:11:40 -0700 Subject: [PATCH 42/45] Added extra unit test case on zk version handling. --- .../cloud/overseer/ZkStateReaderTest.java | 104 ++++++++++++++++++ .../solr/common/cloud/ZkStateReader.java | 12 +- 2 files changed, 111 insertions(+), 5 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java index 50cfba04167..50cc1a950a8 100644 --- a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java @@ -40,6 +40,9 @@ import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.DocCollectionWatcher; import org.apache.solr.common.cloud.DocRouter; +import org.apache.solr.common.cloud.PerReplicaStates; +import org.apache.solr.common.cloud.PerReplicaStatesOps; +import org.apache.solr.common.cloud.Replica; import org.apache.solr.common.cloud.SolrZkClient; import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.util.CommonTestInjection; @@ -217,6 +220,107 @@ public void testWatchedCollectionCreation() throws Exception { assertFalse(ref.isLazilyLoaded()); } + /** + * Verifies that znode and child versions are correct and version changes trigger cluster state updates + */ + public void testNodeVersion() throws Exception { + ZkStateWriter writer = fixture.writer; + ZkStateReader reader = fixture.reader; + + fixture.zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true); + + ClusterState clusterState = reader.getClusterState(); + // create new collection + DocCollection state = + new DocCollection( + "c1", + new HashMap<>(), + Map.of(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), + DocRouter.DEFAULT, + 0); + ZkWriteCommand wc = new ZkWriteCommand("c1", state); + writer.enqueueUpdate(clusterState, Collections.singletonList(wc), null); + clusterState = writer.writePendingUpdates(); + + //have to register it here after the updates, otherwise the child node watch will not be inserted + reader.registerCore("c1"); + + TimeOut timeOut = new TimeOut(5000, TimeUnit.MILLISECONDS, TimeSource.NANO_TIME); + timeOut.waitFor( + "Timeout on waiting for c1 to show up in cluster state", + () -> reader.getClusterState().getCollectionRef("c1") != null); + + ClusterState.CollectionRef ref = reader.getClusterState().getCollectionRef("c1"); + assertNotNull(ref); + assertNotNull(ref.get()); + assertFalse(ref.isLazilyLoaded()); + assertEquals(0, ref.get().getZNodeVersion()); + assertEquals(-1, ref.get().getChildNodesVersion()); + + DocCollection collection = ref.get(); + PerReplicaStates prs = PerReplicaStates.fetch(collection.getZNode(), fixture.zkClient, collection.getPerReplicaStates()); + PerReplicaStatesOps.addReplica("r1", Replica.State.DOWN, false, prs).persist(collection.getZNode(), fixture.zkClient); + timeOut.waitFor( + "Timeout on waiting for c1 updated to have PRS state r1", + () -> { + DocCollection c = reader.getCollection("c1"); + return c.getPerReplicaStates() != null && c.getPerReplicaStates().get("r1") != null && c.getPerReplicaStates().get("r1").state == Replica.State.DOWN; + }); + + ref = reader.getClusterState().getCollectionRef("c1"); + assertEquals(0, ref.get().getZNodeVersion()); //no change in Znode version + assertEquals(1, ref.get().getChildNodesVersion()); //but child version should be 1 now + + prs = ref.get().getPerReplicaStates(); + PerReplicaStatesOps.flipState("r1", Replica.State.ACTIVE, prs).persist(collection.getZNode(), fixture.zkClient); + timeOut.waitFor( + "Timeout on waiting for c1 updated to have PRS state r1 marked as DOWN", + () -> reader.getCollection("c1").getPerReplicaStates().get("r1").state == Replica.State.ACTIVE); + + + ref = reader.getClusterState().getCollectionRef("c1"); + assertEquals(0, ref.get().getZNodeVersion()); //no change in Znode version + assertEquals(3, ref.get().getChildNodesVersion()); //but child version should be 3 now (1 del + 1 add) + + //now delete the collection + wc = new ZkWriteCommand("c1", null); + writer.enqueueUpdate(clusterState, Collections.singletonList(wc), null); + clusterState = writer.writePendingUpdates(); + timeOut.waitFor( + "Timeout on waiting for c1 to be removed from cluster state", + () -> reader.getClusterState().getCollectionRef("c1").get() == null); + + reader.unregisterCore("c1"); + //re-add the same collection + wc = new ZkWriteCommand("c1", state); + writer.enqueueUpdate(clusterState, Collections.singletonList(wc), null); + clusterState = writer.writePendingUpdates(); + reader.registerCore("c1"); //re-register, otherwise the child watch would be missing from collection deletion + + // reader.forceUpdateCollection("c1"); + timeOut.waitFor( + "Timeout on waiting for c1 to show up in cluster state again", + () -> reader.getClusterState().getCollectionRef("c1") != null && reader.getClusterState().getCollectionRef("c1").get() != null); + ref = reader.getClusterState().getCollectionRef("c1"); + assertFalse(ref.isLazilyLoaded()); + assertEquals(0, ref.get().getZNodeVersion()); + assertEquals(-1, ref.get().getChildNodesVersion()); //child node version is reset + + //re-add PRS + collection = ref.get(); + prs = PerReplicaStates.fetch(collection.getZNode(), fixture.zkClient, collection.getPerReplicaStates()); + PerReplicaStatesOps.addReplica("r1", Replica.State.DOWN, false, prs).persist(collection.getZNode(), fixture.zkClient); + timeOut.waitFor( + "Timeout on waiting for c1 updated to have PRS state r1", + () -> { + DocCollection c = reader.getCollection("c1"); + return c.getPerReplicaStates() != null && c.getPerReplicaStates().get("r1") != null && c.getPerReplicaStates().get("r1").state == Replica.State.DOWN; + }); + + ref = reader.getClusterState().getCollectionRef("c1"); + assertEquals(1, ref.get().getChildNodesVersion()); //child version should be reset since the state.json node was deleted and re-created + } + public void testForciblyRefreshAllClusterState() throws Exception { ZkStateWriter writer = fixture.writer; ZkStateReader reader = fixture.reader; diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java index 138b470fea2..353500581c2 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java @@ -296,6 +296,7 @@ private long activeCollectionCount() { /** * Gets a Set of watched collection names. The returned value is thread-safe and unmodifiable. + * * @return Set of watched collection names */ private Set watchedCollections() { @@ -366,7 +367,7 @@ private boolean updateDocCollection(String collection, DocCollection newState) { * associated StatefulCollectionWatch will be removed; otherwise, the returned value will be * assigned to such collection * @return the new StatefulCollectionWatch associated with the collection - * @see ConcurrentHashMap#compute(Object, BiFunction) + * @see ConcurrentHashMap#compute(Object, BiFunction) */ private StatefulCollectionWatch compute( String collectionName, @@ -462,10 +463,11 @@ public void forciblyRefreshAllClusterStateSlow() throws KeeperException, Interru Set updatedCollections = new HashSet<>(); - //Iterate through the actively watched collections. Take note that the returned watched collections - //might change during the iteration, but it should not throw exception as it's thread-safe. - //If such set is modified elsewhere during the iteration, the code logic should still handle such - //missing/extra collection w/o issues. + // Iterate through the actively watched collections. Take note that the returned watched + // collections might change during the iteration, but it should not throw exception as + // it's thread-safe. + // If such set is modified elsewhere during the iteration, the code logic should still + // handle such missing/extra collection w/o issues. for (String coll : collectionWatches.watchedCollections()) { DocCollection newState = fetchCollectionState(coll, null); if (collectionWatches.updateDocCollection(coll, newState)) { From 2ab50d4c7a1b8b4ec13e29035208e8c191b2a0f4 Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Fri, 15 Jul 2022 15:14:59 -0700 Subject: [PATCH 43/45] ./gradlew :solr:core:spotlessApply --- .../cloud/overseer/ZkStateReaderTest.java | 106 +++++++++++------- 1 file changed, 63 insertions(+), 43 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java index 50cc1a950a8..45f96a1232a 100644 --- a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java @@ -221,34 +221,36 @@ public void testWatchedCollectionCreation() throws Exception { } /** - * Verifies that znode and child versions are correct and version changes trigger cluster state updates + * Verifies that znode and child versions are correct and version changes trigger cluster state + * updates */ public void testNodeVersion() throws Exception { ZkStateWriter writer = fixture.writer; ZkStateReader reader = fixture.reader; fixture.zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true); - + ClusterState clusterState = reader.getClusterState(); // create new collection DocCollection state = - new DocCollection( - "c1", - new HashMap<>(), - Map.of(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), - DocRouter.DEFAULT, - 0); + new DocCollection( + "c1", + new HashMap<>(), + Map.of(ZkStateReader.CONFIGNAME_PROP, ConfigSetsHandler.DEFAULT_CONFIGSET_NAME), + DocRouter.DEFAULT, + 0); ZkWriteCommand wc = new ZkWriteCommand("c1", state); writer.enqueueUpdate(clusterState, Collections.singletonList(wc), null); clusterState = writer.writePendingUpdates(); - //have to register it here after the updates, otherwise the child node watch will not be inserted + // have to register it here after the updates, otherwise the child node watch will not be + // inserted reader.registerCore("c1"); TimeOut timeOut = new TimeOut(5000, TimeUnit.MILLISECONDS, TimeSource.NANO_TIME); timeOut.waitFor( - "Timeout on waiting for c1 to show up in cluster state", - () -> reader.getClusterState().getCollectionRef("c1") != null); + "Timeout on waiting for c1 to show up in cluster state", + () -> reader.getClusterState().getCollectionRef("c1") != null); ClusterState.CollectionRef ref = reader.getClusterState().getCollectionRef("c1"); assertNotNull(ref); @@ -258,67 +260,85 @@ public void testNodeVersion() throws Exception { assertEquals(-1, ref.get().getChildNodesVersion()); DocCollection collection = ref.get(); - PerReplicaStates prs = PerReplicaStates.fetch(collection.getZNode(), fixture.zkClient, collection.getPerReplicaStates()); - PerReplicaStatesOps.addReplica("r1", Replica.State.DOWN, false, prs).persist(collection.getZNode(), fixture.zkClient); + PerReplicaStates prs = + PerReplicaStates.fetch( + collection.getZNode(), fixture.zkClient, collection.getPerReplicaStates()); + PerReplicaStatesOps.addReplica("r1", Replica.State.DOWN, false, prs) + .persist(collection.getZNode(), fixture.zkClient); timeOut.waitFor( - "Timeout on waiting for c1 updated to have PRS state r1", - () -> { - DocCollection c = reader.getCollection("c1"); - return c.getPerReplicaStates() != null && c.getPerReplicaStates().get("r1") != null && c.getPerReplicaStates().get("r1").state == Replica.State.DOWN; - }); + "Timeout on waiting for c1 updated to have PRS state r1", + () -> { + DocCollection c = reader.getCollection("c1"); + return c.getPerReplicaStates() != null + && c.getPerReplicaStates().get("r1") != null + && c.getPerReplicaStates().get("r1").state == Replica.State.DOWN; + }); ref = reader.getClusterState().getCollectionRef("c1"); - assertEquals(0, ref.get().getZNodeVersion()); //no change in Znode version - assertEquals(1, ref.get().getChildNodesVersion()); //but child version should be 1 now + assertEquals(0, ref.get().getZNodeVersion()); // no change in Znode version + assertEquals(1, ref.get().getChildNodesVersion()); // but child version should be 1 now prs = ref.get().getPerReplicaStates(); - PerReplicaStatesOps.flipState("r1", Replica.State.ACTIVE, prs).persist(collection.getZNode(), fixture.zkClient); + PerReplicaStatesOps.flipState("r1", Replica.State.ACTIVE, prs) + .persist(collection.getZNode(), fixture.zkClient); timeOut.waitFor( - "Timeout on waiting for c1 updated to have PRS state r1 marked as DOWN", - () -> reader.getCollection("c1").getPerReplicaStates().get("r1").state == Replica.State.ACTIVE); - + "Timeout on waiting for c1 updated to have PRS state r1 marked as DOWN", + () -> + reader.getCollection("c1").getPerReplicaStates().get("r1").state + == Replica.State.ACTIVE); ref = reader.getClusterState().getCollectionRef("c1"); - assertEquals(0, ref.get().getZNodeVersion()); //no change in Znode version - assertEquals(3, ref.get().getChildNodesVersion()); //but child version should be 3 now (1 del + 1 add) + assertEquals(0, ref.get().getZNodeVersion()); // no change in Znode version + // but child version should be 3 now (1 del + 1 add) + assertEquals(3, ref.get().getChildNodesVersion()); - //now delete the collection + // now delete the collection wc = new ZkWriteCommand("c1", null); writer.enqueueUpdate(clusterState, Collections.singletonList(wc), null); clusterState = writer.writePendingUpdates(); timeOut.waitFor( - "Timeout on waiting for c1 to be removed from cluster state", - () -> reader.getClusterState().getCollectionRef("c1").get() == null); + "Timeout on waiting for c1 to be removed from cluster state", + () -> reader.getClusterState().getCollectionRef("c1").get() == null); reader.unregisterCore("c1"); - //re-add the same collection + // re-add the same collection wc = new ZkWriteCommand("c1", state); writer.enqueueUpdate(clusterState, Collections.singletonList(wc), null); clusterState = writer.writePendingUpdates(); - reader.registerCore("c1"); //re-register, otherwise the child watch would be missing from collection deletion + // re-register, otherwise the child watch would be missing from collection deletion + reader.registerCore("c1"); // reader.forceUpdateCollection("c1"); timeOut.waitFor( - "Timeout on waiting for c1 to show up in cluster state again", - () -> reader.getClusterState().getCollectionRef("c1") != null && reader.getClusterState().getCollectionRef("c1").get() != null); + "Timeout on waiting for c1 to show up in cluster state again", + () -> + reader.getClusterState().getCollectionRef("c1") != null + && reader.getClusterState().getCollectionRef("c1").get() != null); ref = reader.getClusterState().getCollectionRef("c1"); assertFalse(ref.isLazilyLoaded()); assertEquals(0, ref.get().getZNodeVersion()); - assertEquals(-1, ref.get().getChildNodesVersion()); //child node version is reset + assertEquals(-1, ref.get().getChildNodesVersion()); // child node version is reset - //re-add PRS + // re-add PRS collection = ref.get(); - prs = PerReplicaStates.fetch(collection.getZNode(), fixture.zkClient, collection.getPerReplicaStates()); - PerReplicaStatesOps.addReplica("r1", Replica.State.DOWN, false, prs).persist(collection.getZNode(), fixture.zkClient); + prs = + PerReplicaStates.fetch( + collection.getZNode(), fixture.zkClient, collection.getPerReplicaStates()); + PerReplicaStatesOps.addReplica("r1", Replica.State.DOWN, false, prs) + .persist(collection.getZNode(), fixture.zkClient); timeOut.waitFor( - "Timeout on waiting for c1 updated to have PRS state r1", - () -> { - DocCollection c = reader.getCollection("c1"); - return c.getPerReplicaStates() != null && c.getPerReplicaStates().get("r1") != null && c.getPerReplicaStates().get("r1").state == Replica.State.DOWN; - }); + "Timeout on waiting for c1 updated to have PRS state r1", + () -> { + DocCollection c = reader.getCollection("c1"); + return c.getPerReplicaStates() != null + && c.getPerReplicaStates().get("r1") != null + && c.getPerReplicaStates().get("r1").state == Replica.State.DOWN; + }); ref = reader.getClusterState().getCollectionRef("c1"); - assertEquals(1, ref.get().getChildNodesVersion()); //child version should be reset since the state.json node was deleted and re-created + + // child version should be reset since the state.json node was deleted and re-created + assertEquals(1, ref.get().getChildNodesVersion()); } public void testForciblyRefreshAllClusterState() throws Exception { From d3e6688f1c9d55d89fa50c5a45fb27682d3a8266 Mon Sep 17 00:00:00 2001 From: Patson Luk Date: Fri, 15 Jul 2022 15:21:44 -0700 Subject: [PATCH 44/45] More precise check condition --- .../org/apache/solr/cloud/overseer/ZkStateReaderTest.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java index 45f96a1232a..ac34bfaa1ae 100644 --- a/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateReaderTest.java @@ -250,11 +250,11 @@ public void testNodeVersion() throws Exception { TimeOut timeOut = new TimeOut(5000, TimeUnit.MILLISECONDS, TimeSource.NANO_TIME); timeOut.waitFor( "Timeout on waiting for c1 to show up in cluster state", - () -> reader.getClusterState().getCollectionRef("c1") != null); + () -> + reader.getClusterState().getCollectionRef("c1") != null + && reader.getClusterState().getCollectionRef("c1").get() != null); ClusterState.CollectionRef ref = reader.getClusterState().getCollectionRef("c1"); - assertNotNull(ref); - assertNotNull(ref.get()); assertFalse(ref.isLazilyLoaded()); assertEquals(0, ref.get().getZNodeVersion()); assertEquals(-1, ref.get().getChildNodesVersion()); From e3000412b39c9aa920a573e5b25305f96738d6e0 Mon Sep 17 00:00:00 2001 From: patsonluk Date: Wed, 20 Jul 2022 10:13:46 -0700 Subject: [PATCH 45/45] Update CHANGES.txt --- solr/CHANGES.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 99f8cfedb0f..253e097da2d 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -50,6 +50,9 @@ Improvements * SOLR-16225: Upgrade dependencies (Carrot2, HPPC) (Dawid Weiss) +* SOLR-16257: Improve ZkStateReader to avoid race condition between collectionWatches and watchedCollectionStates + (Patson Luk, Houston Putman, Mike Drob) + Optimizations --------------------- * SOLR-16120: Optimise hl.fl expansion. (Christine Poerschke, David Smiley, Mike Drob)