From 9c11b616554ac14630e3ae7400b15f69b62a954f Mon Sep 17 00:00:00 2001 From: David Smiley Date: Wed, 2 Aug 2023 17:55:59 -0400 Subject: [PATCH 1/2] SOLR-16415: ZK DistributedMap should reject slash in IDs. * asyncId should not have forward slashes in it * SizeLimitedDistributedMap should be resilient to directories (from before) --- .../org/apache/solr/cloud/DistributedMap.java | 23 ++++++++++++-- .../solr/cloud/SizeLimitedDistributedMap.java | 12 +++---- .../apache/solr/cloud/TestDistributedMap.java | 31 +++++++++++++++++++ .../cloud/TestSizeLimitedDistributedMap.java | 30 ++++++++++++++++++ .../pages/collections-api.adoc | 1 + 5 files changed, 88 insertions(+), 9 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/cloud/DistributedMap.java b/solr/core/src/java/org/apache/solr/cloud/DistributedMap.java index ece2589310a..b1b120d849c 100644 --- a/solr/core/src/java/org/apache/solr/cloud/DistributedMap.java +++ b/solr/core/src/java/org/apache/solr/cloud/DistributedMap.java @@ -16,6 +16,7 @@ */ package org.apache.solr.cloud; +import java.lang.invoke.MethodHandles; import java.util.ArrayList; import java.util.Collection; import java.util.List; @@ -27,12 +28,17 @@ import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException.NodeExistsException; import org.apache.zookeeper.data.Stat; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * A distributed map. This supports basic map functions e.g. get, put, contains for interaction with * zk which don't have to be ordered i.e. DistributedQueue. */ public class DistributedMap { + + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + protected final String dir; protected SolrZkClient zookeeper; @@ -55,7 +61,14 @@ public DistributedMap(SolrZkClient zookeeper, String dir) { this.zookeeper = zookeeper; } + private void assertKeyFormat(String trackingId) { + if (trackingId == null || trackingId.length() == 0 || trackingId.contains("/")) { + throw new SolrException(ErrorCode.BAD_REQUEST, "Unsupported key format: " + trackingId); + } + } + public void put(String trackingId, byte[] data) throws KeeperException, InterruptedException { + assertKeyFormat(trackingId); zookeeper.makePath( dir + "/" + PREFIX + trackingId, data, CreateMode.PERSISTENT, null, false, true); } @@ -63,10 +76,11 @@ public void put(String trackingId, byte[] data) throws KeeperException, Interrup /** * Puts an element in the map only if there isn't one with the same trackingId already * - * @return True if the the element was added. False if it wasn't (because the key already exists) + * @return True if the element was added. False if it wasn't (because the key already exists) */ public boolean putIfAbsent(String trackingId, byte[] data) throws KeeperException, InterruptedException { + assertKeyFormat(trackingId); try { zookeeper.makePath( dir + "/" + PREFIX + trackingId, data, CreateMode.PERSISTENT, null, true, true); @@ -95,10 +109,15 @@ public int size() throws KeeperException, InterruptedException { * not deleted exception an exception occurred while deleting */ public boolean remove(String trackingId) throws KeeperException, InterruptedException { + final var path = dir + "/" + PREFIX + trackingId; try { - zookeeper.delete(dir + "/" + PREFIX + trackingId, -1, true); + zookeeper.delete(path, -1, true); } catch (KeeperException.NoNodeException e) { return false; + } catch (KeeperException.NotEmptyException hack) { + // because dirty data before we enforced the rules on put() (trackingId shouldn't have slash) + log.warn("Cleaning malformed key ID starting with {}", path); + zookeeper.clean(path); } return true; } diff --git a/solr/core/src/java/org/apache/solr/cloud/SizeLimitedDistributedMap.java b/solr/core/src/java/org/apache/solr/cloud/SizeLimitedDistributedMap.java index ab495fffea8..c4c8923b218 100644 --- a/solr/core/src/java/org/apache/solr/cloud/SizeLimitedDistributedMap.java +++ b/solr/core/src/java/org/apache/solr/cloud/SizeLimitedDistributedMap.java @@ -91,13 +91,11 @@ protected boolean lessThan(Long a, Long b) { for (String child : children) { Long id = childToModificationZxid.get(child); if (id != null && id <= topElementMzxId) { - try { - zookeeper.delete(dir + "/" + child, -1, true); - if (onOverflowObserver != null) - onOverflowObserver.onChildDelete(child.substring(PREFIX.length())); - } catch (KeeperException.NoNodeException ignored) { - // this could happen if multiple threads try to clean the same map - } + String trackingId = child.substring(PREFIX.length()); + boolean removed = remove(trackingId); + if (removed && onOverflowObserver != null) { + onOverflowObserver.onChildDelete(trackingId); + } // else, probably multiple threads cleaning the queue simultaneously } } } diff --git a/solr/core/src/test/org/apache/solr/cloud/TestDistributedMap.java b/solr/core/src/test/org/apache/solr/cloud/TestDistributedMap.java index 0b3325e96e5..aa5915f007b 100644 --- a/solr/core/src/test/org/apache/solr/cloud/TestDistributedMap.java +++ b/solr/core/src/test/org/apache/solr/cloud/TestDistributedMap.java @@ -23,6 +23,7 @@ import java.util.concurrent.TimeUnit; import org.apache.commons.io.file.PathUtils; import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.SolrException; import org.apache.solr.common.cloud.SolrZkClient; import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.KeeperException; @@ -218,6 +219,36 @@ public void testClear() throws KeeperException, InterruptedException { } } + public void testMalformed() throws InterruptedException, KeeperException { + try (SolrZkClient zkClient = + new SolrZkClient.Builder() + .withUrl(zkServer.getZkHost()) + .withTimeout(10000, TimeUnit.MILLISECONDS) + .build()) { + String path = getAndMakeInitialPath(zkClient); + DistributedMap map = createMap(zkClient, path); + expectThrows(SolrException.class, () -> map.put("has/slash", new byte[0])); + } + } + + public void testRemoveMalformed() throws InterruptedException, KeeperException { + try (SolrZkClient zkClient = + new SolrZkClient.Builder() + .withUrl(zkServer.getZkHost()) + .withTimeout(10000, TimeUnit.MILLISECONDS) + .build()) { + String path = getAndMakeInitialPath(zkClient); + // Add a "legacy" / malformed key + final var key = "slash/test/0"; + zkClient.makePath(path + "/" + DistributedMap.PREFIX + key, new byte[0], true); + + DistributedMap map = createMap(zkClient, path); + assertEquals(1, map.size()); + map.remove("slash"); + assertEquals(0, map.size()); + } + } + protected DistributedMap createMap(SolrZkClient zkClient, String path) { return new DistributedMap(zkClient, path); } diff --git a/solr/core/src/test/org/apache/solr/cloud/TestSizeLimitedDistributedMap.java b/solr/core/src/test/org/apache/solr/cloud/TestSizeLimitedDistributedMap.java index 493b7bf062b..a534e2e2a30 100644 --- a/solr/core/src/test/org/apache/solr/cloud/TestSizeLimitedDistributedMap.java +++ b/solr/core/src/test/org/apache/solr/cloud/TestSizeLimitedDistributedMap.java @@ -25,9 +25,11 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; import org.apache.solr.common.cloud.SolrZkClient; import org.apache.solr.common.util.ExecutorUtil; import org.apache.solr.common.util.SolrNamedThreadFactory; +import org.apache.zookeeper.KeeperException; public class TestSizeLimitedDistributedMap extends TestDistributedMap { @@ -136,4 +138,32 @@ public void testConcurrentCleanup() throws Exception { protected DistributedMap createMap(SolrZkClient zkClient, String path) { return new SizeLimitedDistributedMap(zkClient, path, Overseer.NUM_RESPONSES_TO_STORE, null); } + + public void testCleanupForKeysWithSlashes() throws InterruptedException, KeeperException { + try (SolrZkClient zkClient = + new SolrZkClient.Builder() + .withUrl(zkServer.getZkHost()) + .withTimeout(10000, TimeUnit.MILLISECONDS) + .build()) { + int maxEntries = 10; + String path = getAndMakeInitialPath(zkClient); + + // Add a "legacy" / malformed key + zkClient.makePath(path + "/" + DistributedMap.PREFIX + "slash/test/0", new byte[0], true); + + AtomicInteger overFlowCounter = new AtomicInteger(); + DistributedMap map = + new SizeLimitedDistributedMap( + zkClient, path, maxEntries, (element) -> overFlowCounter.incrementAndGet()); + + // Now add regular keys until we reach the size limit of the map. + // Once we hit the limit, the oldest item (the one we added above with slashes) is deleted, + // but that fails. + for (int i = 1; i <= maxEntries; ++i) { + map.put(String.valueOf(i), new byte[0]); + } + assertTrue(map.size() <= maxEntries); + assertEquals(1, overFlowCounter.get()); + } + } } diff --git a/solr/solr-ref-guide/modules/configuration-guide/pages/collections-api.adoc b/solr/solr-ref-guide/modules/configuration-guide/pages/collections-api.adoc index 59955461b91..452c384bcc0 100644 --- a/solr/solr-ref-guide/modules/configuration-guide/pages/collections-api.adoc +++ b/solr/solr-ref-guide/modules/configuration-guide/pages/collections-api.adoc @@ -36,6 +36,7 @@ Because this API has a large number of commands and options, we've grouped the c Since some collection API calls can be long running tasks (such as SPLITSHARD), you can optionally have the calls run asynchronously. Specifying `async=` enables you to make an asynchronous call, the status of which can be requested using the <> call at any time. +The ID provided can be any string so long as it doesn't have a `/` in it. As of now, REQUESTSTATUS does not automatically clean up the tracking data structures, meaning the status of completed or failed tasks stays stored in ZooKeeper unless cleared manually. DELETESTATUS can be used to clear the stored statuses. From a103f2ac899f340d7029394707b0adc0c506549b Mon Sep 17 00:00:00 2001 From: David Smiley Date: Thu, 3 Aug 2023 17:13:40 -0400 Subject: [PATCH 2/2] CHANGES.txt --- solr/CHANGES.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 7fe2590a383..c70cdd29125 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -86,6 +86,9 @@ Bug Fixes * SOLR-16905: Allow access to specified "solr.allowPaths" in Security Manager (daylicron, Houston Putman) +* SOLR-16415: asyncId must not have '/'; enforce this. Enhance ZK cleanup to process directories + instead of fail. (David Smiley, Paul McArthur) + Dependency Upgrades --------------------- (No changes)