From a616cfa0eb0d6368c8b762013690c63b87fb3a81 Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Thu, 22 Aug 2019 11:48:17 -0600 Subject: [PATCH] Skip SLM retention if ILM is STOPPING or STOPPED This adds a check to ensure we take no action during SLM retention if ILM is currently stopped or in the process of stopping. Relates to #43663 --- .../xpack/slm/SnapshotLifecycleService.java | 2 +- .../xpack/slm/SnapshotRetentionTask.java | 8 +++- .../xpack/slm/SnapshotRetentionTaskTests.java | 44 ++++++++++++++++++- 3 files changed, 51 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/slm/SnapshotLifecycleService.java b/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/slm/SnapshotLifecycleService.java index d2435324ea5d6..0d27584d83eb8 100644 --- a/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/slm/SnapshotLifecycleService.java +++ b/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/slm/SnapshotLifecycleService.java @@ -104,7 +104,7 @@ SchedulerEngine getScheduler() { /** * Returns true if ILM is in the stopped or stopped state */ - private static boolean ilmStoppedOrStopping(ClusterState state) { + static boolean ilmStoppedOrStopping(ClusterState state) { return Optional.ofNullable((SnapshotLifecycleMetadata) state.metaData().custom(SnapshotLifecycleMetadata.TYPE)) .map(SnapshotLifecycleMetadata::getOperationMode) .map(mode -> OperationMode.STOPPING == mode || OperationMode.STOPPED == mode) diff --git a/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/slm/SnapshotRetentionTask.java b/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/slm/SnapshotRetentionTask.java index 10423207fc338..780eecb35db33 100644 --- a/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/slm/SnapshotRetentionTask.java +++ b/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/slm/SnapshotRetentionTask.java @@ -83,6 +83,13 @@ public SnapshotRetentionTask(Client client, ClusterService clusterService, LongS public void triggered(SchedulerEngine.Event event) { assert event.getJobName().equals(SnapshotRetentionService.SLM_RETENTION_JOB_ID) : "expected id to be " + SnapshotRetentionService.SLM_RETENTION_JOB_ID + " but it was " + event.getJobName(); + + final ClusterState state = clusterService.state(); + if (SnapshotLifecycleService.ilmStoppedOrStopping(state)) { + logger.debug("skipping SLM retention as ILM is currently stopped or stopping"); + return; + } + if (running.compareAndSet(false, true)) { final SnapshotLifecycleStats slmStats = new SnapshotLifecycleStats(); @@ -98,7 +105,6 @@ public void triggered(SchedulerEngine.Event event) { }; try { - final ClusterState state = clusterService.state(); final TimeValue maxDeletionTime = LifecycleSettings.SLM_RETENTION_DURATION_SETTING.get(state.metaData().settings()); logger.info("starting SLM retention snapshot cleanup task"); diff --git a/x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/slm/SnapshotRetentionTaskTests.java b/x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/slm/SnapshotRetentionTaskTests.java index 423882f31f112..ce4ea03dd94c5 100644 --- a/x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/slm/SnapshotRetentionTaskTests.java +++ b/x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/slm/SnapshotRetentionTaskTests.java @@ -315,10 +315,52 @@ private void timeBoundedDeletion(final boolean deletionSuccess) throws Exception threadPool.shutdownNow(); threadPool.awaitTermination(10, TimeUnit.SECONDS); } + } + + public void testSkipWhileStopping() throws Exception { + doTestSkipDuringMode(OperationMode.STOPPING); + } + + public void testSkipWhileStopped() throws Exception { + doTestSkipDuringMode(OperationMode.STOPPED); + } + + private void doTestSkipDuringMode(OperationMode mode) throws Exception { + try (ThreadPool threadPool = new TestThreadPool("slm-test"); + ClusterService clusterService = ClusterServiceUtils.createClusterService(threadPool); + Client noOpClient = new NoOpClient("slm-test")) { + final String policyId = "policy"; + final String repoId = "repo"; + SnapshotLifecyclePolicy policy = new SnapshotLifecyclePolicy(policyId, "snap", "1 * * * * ?", + repoId, null, new SnapshotRetentionConfiguration(TimeValue.timeValueDays(30), null, null)); + + ClusterState state = createState(mode, policy); + ClusterServiceUtils.setState(clusterService, state); + + SnapshotRetentionTask task = new MockSnapshotRetentionTask(noOpClient, clusterService, + new SnapshotLifecycleTaskTests.VerifyingHistoryStore(noOpClient, ZoneOffset.UTC, + (historyItem) -> fail("should never write history")), + threadPool, + () -> { + fail("should not retrieve snapshots"); + return null; + }, + (a, b, c, d, e) -> fail("should not delete snapshots"), + System::nanoTime); + long time = System.currentTimeMillis(); + task.triggered(new SchedulerEngine.Event(SnapshotRetentionService.SLM_RETENTION_JOB_ID, time, time)); + + threadPool.shutdownNow(); + threadPool.awaitTermination(10, TimeUnit.SECONDS); + } } public ClusterState createState(SnapshotLifecyclePolicy... policies) { + return createState(OperationMode.RUNNING, policies); + } + + public ClusterState createState(OperationMode mode, SnapshotLifecyclePolicy... policies) { Map policyMetadataMap = Arrays.stream(policies) .map(policy -> SnapshotLifecyclePolicyMetadata.builder() .setPolicy(policy) @@ -330,7 +372,7 @@ public ClusterState createState(SnapshotLifecyclePolicy... policies) { MetaData metaData = MetaData.builder() .putCustom(SnapshotLifecycleMetadata.TYPE, - new SnapshotLifecycleMetadata(policyMetadataMap, OperationMode.RUNNING, new SnapshotLifecycleStats())) + new SnapshotLifecycleMetadata(policyMetadataMap, mode, new SnapshotLifecycleStats())) .build(); return ClusterState.builder(new ClusterName("cluster")) .metaData(metaData)