Skip to content

Commit

Permalink
Allow ILM to stop if indices have nonexistent policies (#40820)
Browse files Browse the repository at this point in the history
Prior to this PR, there is a bug in ILM which does not allow ILM to stop
if one or more indices have an index.lifecycle.name which refers to
a policy that does not exist - the operation_mode will be stuck as
STOPPING until either the policy is created or the nonexistent
policy is removed from those indices.

This change allows ILM to stop in this case and makes the logging more
clear as to why ILM is not stopping.
  • Loading branch information
gwbrown authored Apr 4, 2019
1 parent 65d2518 commit 5347dec
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -812,6 +812,28 @@ public void testMoveToInjectedStep() throws Exception {
});
}

public void testCanStopILMWithPolicyUsingNonexistentPolicy() throws Exception {
createIndexWithSettings(index, Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)
.put(LifecycleSettings.LIFECYCLE_NAME_SETTING.getKey(), randomAlphaOfLengthBetween(5,15)));

Request stopILMRequest = new Request("POST", "_ilm/stop");
assertOK(client().performRequest(stopILMRequest));

Request statusRequest = new Request("GET", "_ilm/status");
assertBusy(() -> {
Response statusResponse = client().performRequest(statusRequest);
assertOK(statusResponse);
Map<String, Object> statusResponseMap = entityAsMap(statusResponse);
String status = (String) statusResponseMap.get("operation_mode");
assertEquals("STOPPED", status);
});

// Re-start ILM so that subsequent tests don't fail
Request startILMReqest = new Request("POST", "_ilm/start");
assertOK(client().performRequest(startILMReqest));
}

private void createFullPolicy(TimeValue hotTime) throws IOException {
Map<String, LifecycleAction> hotActions = new HashMap<>();
hotActions.put(SetPriorityAction.NAME, new SetPriorityAction(100));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
* A service which runs the {@link LifecyclePolicy}s associated with indexes.
*/
public class IndexLifecycleService
implements ClusterStateListener, ClusterStateApplier, SchedulerEngine.Listener, Closeable, LocalNodeMasterListener {
implements ClusterStateListener, ClusterStateApplier, SchedulerEngine.Listener, Closeable, LocalNodeMasterListener {
private static final Logger logger = LogManager.getLogger(IndexLifecycleService.class);
private static final Set<String> IGNORE_ACTIONS_MAINTENANCE_REQUESTED = Collections.singleton(ShrinkAction.NAME);
private volatile boolean isMaster = false;
Expand Down Expand Up @@ -111,18 +111,26 @@ public void onMaster() {
IndexMetaData idxMeta = cursor.value;
String policyName = LifecycleSettings.LIFECYCLE_NAME_SETTING.get(idxMeta.getSettings());
if (Strings.isNullOrEmpty(policyName) == false) {
StepKey stepKey = IndexLifecycleRunner.getCurrentStepKey(LifecycleExecutionState.fromIndexMetadata(idxMeta));
if (OperationMode.STOPPING == currentMode &&
stepKey != null &&
IGNORE_ACTIONS_MAINTENANCE_REQUESTED.contains(stepKey.getAction()) == false) {
logger.info("skipping policy [{}] for index [{}]. stopping Index Lifecycle execution",
policyName, idxMeta.getIndex().getName());
continue;
final LifecycleExecutionState lifecycleState = LifecycleExecutionState.fromIndexMetadata(idxMeta);
StepKey stepKey = IndexLifecycleRunner.getCurrentStepKey(lifecycleState);

if (OperationMode.STOPPING == currentMode) {
if (stepKey != null && IGNORE_ACTIONS_MAINTENANCE_REQUESTED.contains(stepKey.getAction())) {
logger.info("waiting to stop ILM because index [{}] with policy [{}] is currently in action [{}]",
idxMeta.getIndex().getName(), policyName, stepKey.getAction());
lifecycleRunner.maybeRunAsyncAction(clusterState, idxMeta, policyName, stepKey);
// ILM is trying to stop, but this index is in a Shrink action (or other dangerous action) so we can't stop
safeToStop = false;
} else {
logger.info("skipping policy execution for index [{}] with policy [{}] because ILM is stopping",
idxMeta.getIndex().getName(), policyName);
}
} else {
lifecycleRunner.maybeRunAsyncAction(clusterState, idxMeta, policyName, stepKey);
}
lifecycleRunner.maybeRunAsyncAction(clusterState, idxMeta, policyName, stepKey);
safeToStop = false; // proven false!
}
}

if (safeToStop && OperationMode.STOPPING == currentMode) {
submitOperationModeUpdate(OperationMode.STOPPED);
}
Expand Down Expand Up @@ -184,7 +192,7 @@ public void clusterChanged(ClusterChangedEvent event) {
@Override
public void applyClusterState(ClusterChangedEvent event) {
if (event.localNodeMaster()) { // only act if we are master, otherwise
// keep idle until elected
// keep idle until elected
if (event.state().metaData().custom(IndexLifecycleMetadata.TYPE) != null) {
policyRegistry.update(event.state());
}
Expand Down Expand Up @@ -237,21 +245,34 @@ void triggerPolicies(ClusterState clusterState, boolean fromClusterStateChange)
IndexMetaData idxMeta = cursor.value;
String policyName = LifecycleSettings.LIFECYCLE_NAME_SETTING.get(idxMeta.getSettings());
if (Strings.isNullOrEmpty(policyName) == false) {
StepKey stepKey = IndexLifecycleRunner.getCurrentStepKey(LifecycleExecutionState.fromIndexMetadata(idxMeta));
if (OperationMode.STOPPING == currentMode && stepKey != null
&& IGNORE_ACTIONS_MAINTENANCE_REQUESTED.contains(stepKey.getAction()) == false) {
logger.info("skipping policy [" + policyName + "] for index [" + idxMeta.getIndex().getName()
+ "]. stopping Index Lifecycle execution");
continue;
}
if (fromClusterStateChange) {
lifecycleRunner.runPolicyAfterStateChange(policyName, idxMeta);
final LifecycleExecutionState lifecycleState = LifecycleExecutionState.fromIndexMetadata(idxMeta);
StepKey stepKey = IndexLifecycleRunner.getCurrentStepKey(lifecycleState);

if (OperationMode.STOPPING == currentMode) {
if (stepKey != null && IGNORE_ACTIONS_MAINTENANCE_REQUESTED.contains(stepKey.getAction())) {
logger.info("waiting to stop ILM because index [{}] with policy [{}] is currently in action [{}]",
idxMeta.getIndex().getName(), policyName, stepKey.getAction());
if (fromClusterStateChange) {
lifecycleRunner.runPolicyAfterStateChange(policyName, idxMeta);
} else {
lifecycleRunner.runPeriodicStep(policyName, idxMeta);
}
// ILM is trying to stop, but this index is in a Shrink action (or other dangerous action) so we can't stop
safeToStop = false;
} else {
logger.info("skipping policy execution for index [{}] with policy [{}] because ILM is stopping",
idxMeta.getIndex().getName(), policyName);
}
} else {
lifecycleRunner.runPeriodicStep(policyName, idxMeta);
if (fromClusterStateChange) {
lifecycleRunner.runPolicyAfterStateChange(policyName, idxMeta);
} else {
lifecycleRunner.runPeriodicStep(policyName, idxMeta);
}
}
safeToStop = false; // proven false!
}
}

if (safeToStop && OperationMode.STOPPING == currentMode) {
submitOperationModeUpdate(OperationMode.STOPPED);
}
Expand Down

0 comments on commit 5347dec

Please sign in to comment.