Skip to content

Commit

Permalink
Skip SLM retention if ILM is STOPPING or STOPPED (#45869)
Browse files Browse the repository at this point in the history
This adds a check to ensure we take no action during SLM retention if
ILM is currently stopped or in the process of stopping.

Relates to #43663
  • Loading branch information
dakrone authored Aug 22, 2019
1 parent be684f8 commit d534689
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ SchedulerEngine getScheduler() {
/**
* Returns true if ILM is in the stopped or stopped state
*/
private static boolean ilmStoppedOrStopping(ClusterState state) {
static boolean ilmStoppedOrStopping(ClusterState state) {
return Optional.ofNullable((SnapshotLifecycleMetadata) state.metaData().custom(SnapshotLifecycleMetadata.TYPE))
.map(SnapshotLifecycleMetadata::getOperationMode)
.map(mode -> OperationMode.STOPPING == mode || OperationMode.STOPPED == mode)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,13 @@ public SnapshotRetentionTask(Client client, ClusterService clusterService, LongS
public void triggered(SchedulerEngine.Event event) {
assert event.getJobName().equals(SnapshotRetentionService.SLM_RETENTION_JOB_ID) :
"expected id to be " + SnapshotRetentionService.SLM_RETENTION_JOB_ID + " but it was " + event.getJobName();

final ClusterState state = clusterService.state();
if (SnapshotLifecycleService.ilmStoppedOrStopping(state)) {
logger.debug("skipping SLM retention as ILM is currently stopped or stopping");
return;
}

if (running.compareAndSet(false, true)) {
final SnapshotLifecycleStats slmStats = new SnapshotLifecycleStats();

Expand All @@ -98,7 +105,6 @@ public void triggered(SchedulerEngine.Event event) {
};

try {
final ClusterState state = clusterService.state();
final TimeValue maxDeletionTime = LifecycleSettings.SLM_RETENTION_DURATION_SETTING.get(state.metaData().settings());

logger.info("starting SLM retention snapshot cleanup task");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -315,10 +315,52 @@ private void timeBoundedDeletion(final boolean deletionSuccess) throws Exception
threadPool.shutdownNow();
threadPool.awaitTermination(10, TimeUnit.SECONDS);
}
}

public void testSkipWhileStopping() throws Exception {
doTestSkipDuringMode(OperationMode.STOPPING);
}

public void testSkipWhileStopped() throws Exception {
doTestSkipDuringMode(OperationMode.STOPPED);
}

private void doTestSkipDuringMode(OperationMode mode) throws Exception {
try (ThreadPool threadPool = new TestThreadPool("slm-test");
ClusterService clusterService = ClusterServiceUtils.createClusterService(threadPool);
Client noOpClient = new NoOpClient("slm-test")) {
final String policyId = "policy";
final String repoId = "repo";
SnapshotLifecyclePolicy policy = new SnapshotLifecyclePolicy(policyId, "snap", "1 * * * * ?",
repoId, null, new SnapshotRetentionConfiguration(TimeValue.timeValueDays(30), null, null));

ClusterState state = createState(mode, policy);
ClusterServiceUtils.setState(clusterService, state);

SnapshotRetentionTask task = new MockSnapshotRetentionTask(noOpClient, clusterService,
new SnapshotLifecycleTaskTests.VerifyingHistoryStore(noOpClient, ZoneOffset.UTC,
(historyItem) -> fail("should never write history")),
threadPool,
() -> {
fail("should not retrieve snapshots");
return null;
},
(a, b, c, d, e) -> fail("should not delete snapshots"),
System::nanoTime);

long time = System.currentTimeMillis();
task.triggered(new SchedulerEngine.Event(SnapshotRetentionService.SLM_RETENTION_JOB_ID, time, time));

threadPool.shutdownNow();
threadPool.awaitTermination(10, TimeUnit.SECONDS);
}
}

public ClusterState createState(SnapshotLifecyclePolicy... policies) {
return createState(OperationMode.RUNNING, policies);
}

public ClusterState createState(OperationMode mode, SnapshotLifecyclePolicy... policies) {
Map<String, SnapshotLifecyclePolicyMetadata> policyMetadataMap = Arrays.stream(policies)
.map(policy -> SnapshotLifecyclePolicyMetadata.builder()
.setPolicy(policy)
Expand All @@ -330,7 +372,7 @@ public ClusterState createState(SnapshotLifecyclePolicy... policies) {

MetaData metaData = MetaData.builder()
.putCustom(SnapshotLifecycleMetadata.TYPE,
new SnapshotLifecycleMetadata(policyMetadataMap, OperationMode.RUNNING, new SnapshotLifecycleStats()))
new SnapshotLifecycleMetadata(policyMetadataMap, mode, new SnapshotLifecycleStats()))
.build();
return ClusterState.builder(new ClusterName("cluster"))
.metaData(metaData)
Expand Down

0 comments on commit d534689

Please sign in to comment.