From edb50463028ab8bb8d6da8855c077e80aeca8cf7 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Thu, 14 Nov 2019 22:30:04 +0100 Subject: [PATCH] Track Repository Gen. in BlobStoreRepository (#48944) This is intended as a stop-gap solution/improvement to #38941 that prevents repo modifications without an intermittent master failover from causing inconsistent (outdated due to inconsistent listing of index-N blobs) `RepositoryData` to be written. Tracking the latest repository generation will move to the cluster state in a separate pull request. This is intended as a low-risk change to be backported as far as possible and motived by the recently increased chance of #38941 causing trouble via SLM (see https://github.com/elastic/elasticsearch/issues/47520). Closes #47834 Closes #49048 --- .../blobstore/BlobStoreRepository.java | 83 ++++++++++++++++--- .../snapshots/SnapshotResiliencyTests.java | 4 +- .../MockEventuallyConsistentRepository.java | 42 +++++++--- ...ckEventuallyConsistentRepositoryTests.java | 12 +-- 4 files changed, 111 insertions(+), 30 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java index 6a7a094de9886..ea301b80246b2 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java @@ -113,6 +113,7 @@ import java.util.concurrent.Executor; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -372,7 +373,7 @@ public void deleteSnapshot(SnapshotId snapshotId, long repositoryStateId, boolea } else { try { final Map rootBlobs = blobContainer().listBlobs(); - final RepositoryData repositoryData = getRepositoryData(latestGeneration(rootBlobs.keySet())); + final RepositoryData repositoryData = safeRepositoryData(repositoryStateId, rootBlobs); // Cache the indices that were found before writing out the new index-N blob so that a stuck master will never // delete an index that was created by another master node after writing this index-N blob. final Map foundIndices = blobStore().blobContainer(indicesPath()).children(); @@ -383,6 +384,30 @@ public void deleteSnapshot(SnapshotId snapshotId, long repositoryStateId, boolea } } + /** + * Loads {@link RepositoryData} ensuring that it is consistent with the given {@code rootBlobs} as well of the assumed generation. + * + * @param repositoryStateId Expected repository generation + * @param rootBlobs Blobs at the repository root + * @return RepositoryData + */ + private RepositoryData safeRepositoryData(long repositoryStateId, Map rootBlobs) { + final long generation = latestGeneration(rootBlobs.keySet()); + final long genToLoad = latestKnownRepoGen.updateAndGet(known -> Math.max(known, repositoryStateId)); + if (genToLoad > generation) { + // It's always a possibility to not see the latest index-N in the listing here on an eventually consistent blob store, just + // debug log it. Any blobs leaked as a result of an inconsistent listing here will be cleaned up in a subsequent cleanup or + // snapshot delete run anyway. + logger.debug("Determined repository's generation from its contents to [" + generation + "] but " + + "current generation is at least [" + genToLoad + "]"); + } + if (genToLoad != repositoryStateId) { + throw new RepositoryException(metadata.name(), "concurrent modification of the index-N file, expected current generation [" + + repositoryStateId + "], actual current generation [" + genToLoad + "]"); + } + return getRepositoryData(genToLoad); + } + /** * After updating the {@link RepositoryData} each of the shards directories is individually first moved to the next shard generation * and then has all now unreferenced blobs in it deleted. @@ -610,14 +635,8 @@ public void cleanup(long repositoryStateId, boolean writeShardGens, ActionListen if (isReadOnly()) { throw new RepositoryException(metadata.name(), "cannot run cleanup on readonly repository"); } - final RepositoryData repositoryData = getRepositoryData(); - if (repositoryData.getGenId() != repositoryStateId) { - // Check that we are working on the expected repository version before gathering the data to clean up - throw new RepositoryException(metadata.name(), "concurrent modification of the repository before cleanup started, " + - "expected current generation [" + repositoryStateId + "], actual current generation [" - + repositoryData.getGenId() + "]"); - } Map rootBlobs = blobContainer().listBlobs(); + final RepositoryData repositoryData = safeRepositoryData(repositoryStateId, rootBlobs); final Map foundIndices = blobStore().blobContainer(indicesPath()).children(); final Set survivingIndexIds = repositoryData.getIndices().values().stream().map(IndexId::getId).collect(Collectors.toSet()); @@ -903,12 +922,36 @@ public void endVerification(String seed) { } } + // Tracks the latest known repository generation in a best-effort way to detect inconsistent listing of root level index-N blobs + // and concurrent modifications. + // Protected for use in MockEventuallyConsistentRepository + protected final AtomicLong latestKnownRepoGen = new AtomicLong(RepositoryData.EMPTY_REPO_GEN); + @Override public RepositoryData getRepositoryData() { - try { - return getRepositoryData(latestIndexBlobId()); - } catch (IOException ioe) { - throw new RepositoryException(metadata.name(), "Could not determine repository generation from root blobs", ioe); + // Retry loading RepositoryData in a loop in case we run into concurrent modifications of the repository. + while (true) { + final long generation; + try { + generation = latestIndexBlobId(); + } catch (IOException ioe) { + throw new RepositoryException(metadata.name(), "Could not determine repository generation from root blobs", ioe); + } + final long genToLoad = latestKnownRepoGen.updateAndGet(known -> Math.max(known, generation)); + if (genToLoad > generation) { + logger.info("Determined repository generation [" + generation + + "] from repository contents but correct generation must be at least [" + genToLoad + "]"); + } + try { + return getRepositoryData(genToLoad); + } catch (RepositoryException e) { + if (genToLoad != latestKnownRepoGen.get()) { + logger.warn("Failed to load repository data generation [" + genToLoad + + "] because a concurrent operation moved the current generation to [" + latestKnownRepoGen.get() + "]", e); + continue; + } + throw e; + } } } @@ -926,6 +969,12 @@ private RepositoryData getRepositoryData(long indexGen) { return RepositoryData.snapshotsFromXContent(parser, indexGen); } } catch (IOException ioe) { + // If we fail to load the generation we tracked in latestKnownRepoGen we reset it. + // This is done as a fail-safe in case a user manually deletes the contents of the repository in which case subsequent + // operations must start from the EMPTY_REPO_GEN again + if (latestKnownRepoGen.compareAndSet(indexGen, RepositoryData.EMPTY_REPO_GEN)) { + logger.warn("Resetting repository generation tracker because we failed to read generation [" + indexGen + "]", ioe); + } throw new RepositoryException(metadata.name(), "could not read repository data from index blob", ioe); } } @@ -951,11 +1000,21 @@ protected void writeIndexGen(final RepositoryData repositoryData, final long exp "] - possibly due to simultaneous snapshot deletion requests"); } final long newGen = currentGen + 1; + if (latestKnownRepoGen.get() >= newGen) { + throw new IllegalArgumentException( + "Tried writing generation [" + newGen + "] but repository is at least at generation [" + newGen + "] already"); + } // write the index file final String indexBlob = INDEX_FILE_PREFIX + Long.toString(newGen); logger.debug("Repository [{}] writing new index generational blob [{}]", metadata.name(), indexBlob); writeAtomic(indexBlob, BytesReference.bytes(repositoryData.snapshotsToXContent(XContentFactory.jsonBuilder(), writeShardGens)), true); + final long latestKnownGen = latestKnownRepoGen.updateAndGet(known -> Math.max(known, newGen)); + if (newGen < latestKnownGen) { + // Don't mess up the index.latest blob + throw new IllegalStateException( + "Wrote generation [" + newGen + "] but latest known repo gen concurrently changed to [" + latestKnownGen + "]"); + } // write the current generation to the index-latest file final BytesReference genBytes; try (BytesStreamOutput bStream = new BytesStreamOutput()) { diff --git a/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java index 3f9f7ec131d5e..c61b0b1e1b872 100644 --- a/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java @@ -396,7 +396,7 @@ public void testConcurrentSnapshotCreateAndDelete() { final StepListener createAnotherSnapshotResponseStepListener = new StepListener<>(); continueOrDie(deleteSnapshotStepListener, acknowledgedResponse -> masterNode.client.admin().cluster() - .prepareCreateSnapshot(repoName, snapshotName).execute(createAnotherSnapshotResponseStepListener)); + .prepareCreateSnapshot(repoName, snapshotName).setWaitForCompletion(true).execute(createAnotherSnapshotResponseStepListener)); continueOrDie(createAnotherSnapshotResponseStepListener, createSnapshotResponse -> assertEquals(createSnapshotResponse.getSnapshotInfo().state(), SnapshotState.SUCCESS)); @@ -1146,7 +1146,7 @@ protected void assertSnapshotOrGenericThread() { } else { return metaData -> { final Repository repository = new MockEventuallyConsistentRepository( - metaData, xContentRegistry(), deterministicTaskQueue.getThreadPool(), blobStoreContext); + metaData, xContentRegistry(), deterministicTaskQueue.getThreadPool(), blobStoreContext, random()); repository.start(); return repository; }; diff --git a/server/src/test/java/org/elasticsearch/snapshots/mockstore/MockEventuallyConsistentRepository.java b/server/src/test/java/org/elasticsearch/snapshots/mockstore/MockEventuallyConsistentRepository.java index 11f30e0633b05..fab11c82d169e 100644 --- a/server/src/test/java/org/elasticsearch/snapshots/mockstore/MockEventuallyConsistentRepository.java +++ b/server/src/test/java/org/elasticsearch/snapshots/mockstore/MockEventuallyConsistentRepository.java @@ -43,9 +43,11 @@ import java.io.InputStream; import java.nio.file.NoSuchFileException; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Random; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import java.util.function.Function; @@ -63,18 +65,22 @@ */ public class MockEventuallyConsistentRepository extends BlobStoreRepository { + private final Random random; + private final Context context; private final NamedXContentRegistry namedXContentRegistry; public MockEventuallyConsistentRepository( - RepositoryMetaData metadata, - NamedXContentRegistry namedXContentRegistry, - ThreadPool threadPool, - Context context) { - super(metadata,false, namedXContentRegistry, threadPool); + final RepositoryMetaData metadata, + final NamedXContentRegistry namedXContentRegistry, + final ThreadPool threadPool, + final Context context, + final Random random) { + super(metadata, false, namedXContentRegistry, threadPool); this.context = context; this.namedXContentRegistry = namedXContentRegistry; + this.random = random; } // Filters out all actions that are super-seeded by subsequent actions @@ -111,6 +117,9 @@ public BlobPath basePath() { */ public static final class Context { + // Eventual consistency is only simulated as long as this flag is false + private boolean consistent; + private final List actions = new ArrayList<>(); /** @@ -121,6 +130,7 @@ public void forceConsistent() { final List consistentActions = consistentView(actions); actions.clear(); actions.addAll(consistentActions); + consistent = true; } } } @@ -244,14 +254,14 @@ public Map listBlobs() { ensureNotClosed(); final String thisPath = path.buildAsString(); synchronized (context.actions) { - return consistentView(context.actions).stream() + return maybeMissLatestIndexN(consistentView(context.actions).stream() .filter( action -> action.path.startsWith(thisPath) && action.path.substring(thisPath.length()).indexOf('/') == -1 && action.operation == Operation.PUT) .collect( Collectors.toMap( action -> action.path.substring(thisPath.length()), - action -> new PlainBlobMetaData(action.path.substring(thisPath.length()), action.data.length))); + action -> new PlainBlobMetaData(action.path.substring(thisPath.length()), action.data.length)))); } } @@ -272,9 +282,21 @@ public Map children() { @Override public Map listBlobsByPrefix(String blobNamePrefix) { - return - listBlobs().entrySet().stream().filter(entry -> entry.getKey().startsWith(blobNamePrefix)).collect( - Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + return maybeMissLatestIndexN( + listBlobs().entrySet().stream().filter(entry -> entry.getKey().startsWith(blobNamePrefix)) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))); + } + + // Randomly filter out the latest /index-N blob from a listing to test that tracking of it in latestKnownRepoGen + // overrides an inconsistent listing + private Map maybeMissLatestIndexN(Map listing) { + // Only filter out latest index-N at the repo root and only as long as we're not in a forced consistent state + if (path.parent() == null && context.consistent == false && random.nextBoolean()) { + final Map filtered = new HashMap<>(listing); + filtered.remove(BlobStoreRepository.INDEX_FILE_PREFIX + latestKnownRepoGen.get()); + return Collections.unmodifiableMap(filtered); + } + return listing; } @Override diff --git a/server/src/test/java/org/elasticsearch/snapshots/mockstore/MockEventuallyConsistentRepositoryTests.java b/server/src/test/java/org/elasticsearch/snapshots/mockstore/MockEventuallyConsistentRepositoryTests.java index b5756c89377ea..5fdfbe9a93a4d 100644 --- a/server/src/test/java/org/elasticsearch/snapshots/mockstore/MockEventuallyConsistentRepositoryTests.java +++ b/server/src/test/java/org/elasticsearch/snapshots/mockstore/MockEventuallyConsistentRepositoryTests.java @@ -50,7 +50,7 @@ public void testReadAfterWriteConsistently() throws IOException { MockEventuallyConsistentRepository.Context blobStoreContext = new MockEventuallyConsistentRepository.Context(); try (BlobStoreRepository repository = new MockEventuallyConsistentRepository( new RepositoryMetaData("testRepo", "mockEventuallyConsistent", Settings.EMPTY), - xContentRegistry(), mock(ThreadPool.class), blobStoreContext)) { + xContentRegistry(), mock(ThreadPool.class), blobStoreContext, random())) { repository.start(); final BlobContainer blobContainer = repository.blobStore().blobContainer(repository.basePath()); final String blobName = randomAlphaOfLength(10); @@ -70,7 +70,7 @@ public void testReadAfterWriteAfterReadThrows() throws IOException { MockEventuallyConsistentRepository.Context blobStoreContext = new MockEventuallyConsistentRepository.Context(); try (BlobStoreRepository repository = new MockEventuallyConsistentRepository( new RepositoryMetaData("testRepo", "mockEventuallyConsistent", Settings.EMPTY), - xContentRegistry(), mock(ThreadPool.class), blobStoreContext)) { + xContentRegistry(), mock(ThreadPool.class), blobStoreContext, random())) { repository.start(); final BlobContainer blobContainer = repository.blobStore().blobContainer(repository.basePath()); final String blobName = randomAlphaOfLength(10); @@ -86,7 +86,7 @@ public void testReadAfterDeleteAfterWriteThrows() throws IOException { MockEventuallyConsistentRepository.Context blobStoreContext = new MockEventuallyConsistentRepository.Context(); try (BlobStoreRepository repository = new MockEventuallyConsistentRepository( new RepositoryMetaData("testRepo", "mockEventuallyConsistent", Settings.EMPTY), - xContentRegistry(), mock(ThreadPool.class), blobStoreContext)) { + xContentRegistry(), mock(ThreadPool.class), blobStoreContext, random())) { repository.start(); final BlobContainer blobContainer = repository.blobStore().blobContainer(repository.basePath()); final String blobName = randomAlphaOfLength(10); @@ -104,7 +104,7 @@ public void testOverwriteRandomBlobFails() throws IOException { MockEventuallyConsistentRepository.Context blobStoreContext = new MockEventuallyConsistentRepository.Context(); try (BlobStoreRepository repository = new MockEventuallyConsistentRepository( new RepositoryMetaData("testRepo", "mockEventuallyConsistent", Settings.EMPTY), - xContentRegistry(), mock(ThreadPool.class), blobStoreContext)) { + xContentRegistry(), mock(ThreadPool.class), blobStoreContext, random())) { repository.start(); final BlobContainer container = repository.blobStore().blobContainer(repository.basePath()); final String blobName = randomAlphaOfLength(10); @@ -121,7 +121,7 @@ public void testOverwriteShardSnapBlobFails() throws IOException { MockEventuallyConsistentRepository.Context blobStoreContext = new MockEventuallyConsistentRepository.Context(); try (BlobStoreRepository repository = new MockEventuallyConsistentRepository( new RepositoryMetaData("testRepo", "mockEventuallyConsistent", Settings.EMPTY), - xContentRegistry(), mock(ThreadPool.class), blobStoreContext)) { + xContentRegistry(), mock(ThreadPool.class), blobStoreContext, random())) { repository.start(); final BlobContainer container = repository.blobStore().blobContainer(repository.basePath().add("indices").add("someindex").add("0")); @@ -143,7 +143,7 @@ public void testOverwriteSnapshotInfoBlob() { new ThreadPool.Info(ThreadPool.Names.SNAPSHOT, ThreadPool.ThreadPoolType.FIXED, randomIntBetween(1, 10))); try (BlobStoreRepository repository = new MockEventuallyConsistentRepository( new RepositoryMetaData("testRepo", "mockEventuallyConsistent", Settings.EMPTY), - xContentRegistry(), threadPool, blobStoreContext)) { + xContentRegistry(), threadPool, blobStoreContext, random())) { repository.start(); // We create a snap- blob for snapshot "foo" in the first generation