Skip to content

Commit

Permalink
ILM: allow check-migration step to continue if tier setting unset (el…
Browse files Browse the repository at this point in the history
…astic#62636)

This allows the `check-migration` step to move past the allocation check
if the tier routing settings are manually unset.

This helps a user unblock ILM in case a tier is removed (ie. if the warm tier
is decommissioned this will allow users to resume the ILM policies stuck in
`check-migration` waiting for the warm nodes to become available and the managed
index to allocate. this allows the index to allocate on the other available tiers)
  • Loading branch information
andreidan authored Sep 21, 2020
1 parent e48eab9 commit d7a1eaa
Show file tree
Hide file tree
Showing 3 changed files with 160 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.node.DiscoveryNodeRole;
import org.elasticsearch.cluster.routing.allocation.decider.AllocationDeciders;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.settings.ClusterSettings;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Settings;
Expand All @@ -26,8 +27,9 @@
import java.util.Set;

import static org.elasticsearch.cluster.node.DiscoveryNodeRole.DATA_ROLE;
import static org.elasticsearch.xpack.cluster.routing.allocation.DataTierAllocationDecider.INDEX_ROUTING_INCLUDE_SETTING;
import static org.elasticsearch.xpack.cluster.routing.allocation.DataTierAllocationDecider.INDEX_ROUTING_PREFER_SETTING;
import static org.elasticsearch.xpack.core.ilm.AllocationRoutedStep.getPendingAllocations;
import static org.elasticsearch.xpack.core.ilm.step.info.AllocationInfo.waitingForActiveShardsAllocationInfo;

/**
* Checks whether all shards have been correctly routed in response to updating the allocation rules for an index in order
Expand Down Expand Up @@ -71,11 +73,23 @@ public Result isConditionMet(Index index, ClusterState clusterState) {
logger.debug("[{}] lifecycle action for index [{}] executed but index no longer exists", getKey().getAction(), index.getName());
return new Result(false, null);
}
String destinationTier = INDEX_ROUTING_INCLUDE_SETTING.get(idxMeta.getSettings());
String destinationTier = INDEX_ROUTING_PREFER_SETTING.get(idxMeta.getSettings());
if (ActiveShardCount.ALL.enoughShardsActive(clusterState, index.getName()) == false) {
logger.debug("[{}] migration of index [{}] to the [{}] tier cannot progress, as not all shards are active",
if (Strings.isEmpty(destinationTier)) {
logger.debug("[{}] lifecycle action for index [{}] cannot make progress because not all shards are active",
getKey().getAction(), index.getName());
} else {
logger.debug("[{}] migration of index [{}] to the [{}] tier cannot progress, as not all shards are active",
getKey().getAction(), index.getName(), destinationTier);
return new Result(false, AllocationInfo.waitingForActiveShardsAllocationInfo(idxMeta.getNumberOfReplicas()));
}
return new Result(false, waitingForActiveShardsAllocationInfo(idxMeta.getNumberOfReplicas()));
}

if (Strings.isEmpty(destinationTier)) {
logger.debug("index [{}] has no data tier routing setting configured and all its shards are active. considering the [{}] " +
"step condition met and continuing to the next step", index.getName(), getKey().getName());
// the user removed the tier routing setting and all the shards are active so we'll cary on
return new Result(true, null);
}

int allocationPendingAllShards = getPendingAllocations(index, ALLOCATION_DECIDERS, clusterState);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
import java.util.Set;

import static java.util.Collections.emptyMap;
import static org.elasticsearch.xpack.cluster.routing.allocation.DataTierAllocationDecider.INDEX_ROUTING_INCLUDE_SETTING;
import static org.elasticsearch.xpack.cluster.routing.allocation.DataTierAllocationDecider.INDEX_ROUTING_PREFER;
import static org.elasticsearch.xpack.core.ilm.step.info.AllocationInfo.waitingForActiveShardsAllocationInfo;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.nullValue;
Expand Down Expand Up @@ -95,7 +95,7 @@ public void testExecuteWithUnassignedShard() {

public void testExecuteWithPendingShards() {
IndexMetadata indexMetadata = IndexMetadata.builder(randomAlphaOfLengthBetween(5, 10))
.settings(settings(Version.CURRENT).put(INDEX_ROUTING_INCLUDE_SETTING.getKey(), DataTier.DATA_WARM))
.settings(settings(Version.CURRENT).put(INDEX_ROUTING_PREFER, DataTier.DATA_WARM))
.numberOfShards(1).numberOfReplicas(0).build();
Index index = indexMetadata.getIndex();
IndexRoutingTable.Builder indexRoutingTable = IndexRoutingTable.builder(index)
Expand All @@ -122,7 +122,7 @@ public void testExecuteWithPendingShards() {

public void testExecuteWithPendingShardsAndTargetRoleNotPresentInCluster() {
IndexMetadata indexMetadata = IndexMetadata.builder(randomAlphaOfLengthBetween(5, 10))
.settings(settings(Version.CURRENT).put(INDEX_ROUTING_INCLUDE_SETTING.getKey(), DataTier.DATA_WARM))
.settings(settings(Version.CURRENT).put(INDEX_ROUTING_PREFER, DataTier.DATA_WARM))
.numberOfShards(1).numberOfReplicas(0).build();
Index index = indexMetadata.getIndex();
IndexRoutingTable.Builder indexRoutingTable = IndexRoutingTable.builder(index)
Expand Down Expand Up @@ -159,7 +159,7 @@ public void testExecuteIndexMissing() {

public void testExecuteIsComplete() {
IndexMetadata indexMetadata = IndexMetadata.builder(randomAlphaOfLengthBetween(5, 10))
.settings(settings(Version.CURRENT).put(INDEX_ROUTING_INCLUDE_SETTING.getKey(), DataTier.DATA_WARM))
.settings(settings(Version.CURRENT).put(INDEX_ROUTING_PREFER, DataTier.DATA_WARM))
.numberOfShards(1).numberOfReplicas(0).build();
Index index = indexMetadata.getIndex();
IndexRoutingTable.Builder indexRoutingTable = IndexRoutingTable.builder(index)
Expand All @@ -181,7 +181,7 @@ public void testExecuteIsComplete() {

public void testExecuteWithGenericDataNodes() {
IndexMetadata indexMetadata = IndexMetadata.builder(randomAlphaOfLengthBetween(5, 10))
.settings(settings(Version.CURRENT).put(INDEX_ROUTING_INCLUDE_SETTING.getKey(), DataTier.DATA_WARM))
.settings(settings(Version.CURRENT).put(INDEX_ROUTING_PREFER, DataTier.DATA_WARM))
.numberOfShards(1).numberOfReplicas(0).build();
Index index = indexMetadata.getIndex();
IndexRoutingTable.Builder indexRoutingTable = IndexRoutingTable.builder(index)
Expand All @@ -200,6 +200,52 @@ public void testExecuteWithGenericDataNodes() {
assertThat(result.getInfomationContext(), is(nullValue()));
}

public void testExecuteForIndexWithoutTierRoutingInformationWaitsForReplicasToBeActive() {
IndexMetadata indexMetadata = IndexMetadata.builder(randomAlphaOfLengthBetween(5, 10))
.settings(settings(Version.CURRENT))
.numberOfShards(1).numberOfReplicas(1).build();
Index index = indexMetadata.getIndex();
{
IndexRoutingTable.Builder indexRoutingTable = IndexRoutingTable.builder(index)
.addShard(TestShardRouting.newShardRouting(new ShardId(index, 0), "node1", true, ShardRoutingState.STARTED))
.addReplica();

ClusterState clusterState =
ClusterState.builder(ClusterState.EMPTY_STATE).metadata(Metadata.builder().put(indexMetadata, true).build())
.nodes(DiscoveryNodes.builder()
.add(newNode("node1", Collections.singleton(DataTier.DATA_HOT_NODE_ROLE)))
)
.routingTable(RoutingTable.builder().add(indexRoutingTable).build())
.build();
DataTierMigrationRoutedStep step = createRandomInstance();
Result expectedResult = new Result(false, waitingForActiveShardsAllocationInfo(1));

Result result = step.isConditionMet(index, clusterState);
assertThat(result.isComplete(), is(false));
assertThat(result.getInfomationContext(), is(expectedResult.getInfomationContext()));
}

{
IndexRoutingTable.Builder indexRoutingTable = IndexRoutingTable.builder(index)
.addShard(TestShardRouting.newShardRouting(new ShardId(index, 0), "node1", true, ShardRoutingState.STARTED))
.addShard(TestShardRouting.newShardRouting(new ShardId(index, 0), "node2", false, ShardRoutingState.STARTED));

ClusterState clusterState =
ClusterState.builder(ClusterState.EMPTY_STATE).metadata(Metadata.builder().put(indexMetadata, true).build())
.nodes(DiscoveryNodes.builder()
.add(newNode("node1", Collections.singleton(DataTier.DATA_HOT_NODE_ROLE)))
.add(newNode("node2", Collections.singleton(DataTier.DATA_WARM_NODE_ROLE)))
)
.routingTable(RoutingTable.builder().add(indexRoutingTable).build())
.build();
DataTierMigrationRoutedStep step = createRandomInstance();

Result result = step.isConditionMet(index, clusterState);
assertThat(result.isComplete(), is(true));
assertThat(result.getInfomationContext(), is(nullValue()));
}
}

private DiscoveryNode newNode(String nodeId, Set<DiscoveryNodeRole> roles) {
return new DiscoveryNode(nodeId, buildNewFakeTransportAddress(), emptyMap(), roles, Version.CURRENT);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,16 @@

package org.elasticsearch.xpack.ilm;

import org.elasticsearch.action.admin.cluster.allocation.ClusterAllocationExplainRequest;
import org.elasticsearch.action.admin.cluster.allocation.ClusterAllocationExplainResponse;
import org.elasticsearch.action.admin.indices.create.CreateIndexResponse;
import org.elasticsearch.action.admin.indices.settings.put.UpdateSettingsRequest;
import org.elasticsearch.cluster.routing.ShardRoutingState;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.xpack.cluster.routing.allocation.DataTierAllocationDecider;
import org.elasticsearch.xpack.core.DataTier;
import org.elasticsearch.xpack.core.LocalStateCompositeXPackPlugin;
import org.elasticsearch.xpack.core.XPackSettings;
Expand All @@ -30,6 +35,7 @@
import java.util.Collections;
import java.util.Locale;
import java.util.Map;
import java.util.concurrent.TimeUnit;

import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS;
import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS;
Expand Down Expand Up @@ -138,4 +144,89 @@ public void testIndexDataTierMigration() throws Exception {
assertThat(indexLifecycleExplainResponse.getStep(), is("complete"));
});
}

public void testUserOptsOutOfTierMigration() throws Exception {
internalCluster().startMasterOnlyNodes(1, Settings.EMPTY);
logger.info("starting hot data node");
internalCluster().startNode(hotNode(Settings.EMPTY));

Phase hotPhase = new Phase("hot", TimeValue.ZERO, Collections.emptyMap());
Phase warmPhase = new Phase("warm", TimeValue.ZERO, Collections.emptyMap());
Phase coldPhase = new Phase("cold", TimeValue.ZERO, Collections.emptyMap());
LifecyclePolicy lifecyclePolicy = new LifecyclePolicy(policy, Map.of("hot", hotPhase, "warm", warmPhase, "cold", coldPhase));
PutLifecycleAction.Request putLifecycleRequest = new PutLifecycleAction.Request(lifecyclePolicy);
PutLifecycleAction.Response putLifecycleResponse = client().execute(PutLifecycleAction.INSTANCE, putLifecycleRequest).get();
assertAcked(putLifecycleResponse);

Settings settings = Settings.builder().put(indexSettings()).put(SETTING_NUMBER_OF_SHARDS, 1)
.put(SETTING_NUMBER_OF_REPLICAS, 1).put(LifecycleSettings.LIFECYCLE_NAME, policy).build();
CreateIndexResponse res = client().admin().indices().prepareCreate(managedIndex).setSettings(settings).get();
assertTrue(res.isAcknowledged());

assertBusy(() -> {
ExplainLifecycleRequest explainRequest = new ExplainLifecycleRequest().indices(managedIndex);
ExplainLifecycleResponse explainResponse = client().execute(ExplainLifecycleAction.INSTANCE,
explainRequest).get();

IndexLifecycleExplainResponse indexLifecycleExplainResponse = explainResponse.getIndexResponses().get(managedIndex);
assertThat(indexLifecycleExplainResponse.getPhase(), is("warm"));
assertThat(indexLifecycleExplainResponse.getStep(), is(DataTierMigrationRoutedStep.NAME));
});

Settings removeTierRoutingSetting = Settings.builder().putNull(DataTierAllocationDecider.INDEX_ROUTING_PREFER).build();
UpdateSettingsRequest updateSettingsRequest = new UpdateSettingsRequest(managedIndex).settings(removeTierRoutingSetting);
assertAcked(client().admin().indices().updateSettings(updateSettingsRequest).actionGet());

assertBusy(() -> {
ExplainLifecycleRequest explainRequest = new ExplainLifecycleRequest().indices(managedIndex);
ExplainLifecycleResponse explainResponse = client().execute(ExplainLifecycleAction.INSTANCE,
explainRequest).get();

IndexLifecycleExplainResponse indexLifecycleExplainResponse = explainResponse.getIndexResponses().get(managedIndex);
assertThat(indexLifecycleExplainResponse.getPhase(), is("warm"));
assertThat(indexLifecycleExplainResponse.getStep(), is(DataTierMigrationRoutedStep.NAME));
assertReplicaIsUnassigned();
}, 30, TimeUnit.SECONDS);

internalCluster().startNode(coldNode(Settings.EMPTY));

// the index should successfully allocate
ensureGreen(managedIndex);

// the index is successfully allocated but the migrate action from the cold phase re-configured the tier migration setting to the
// cold tier so ILM is stuck in `check-migration` in the cold phase this time
// we have 2 options to resume the ILM execution:
// 1. start another cold node so both the primary and replica can relocate to the cold nodes
// 2. remove the tier routing setting from the index again (we're doing this below)
assertBusy(() -> {
ExplainLifecycleRequest explainRequest = new ExplainLifecycleRequest().indices(managedIndex);
ExplainLifecycleResponse explainResponse = client().execute(ExplainLifecycleAction.INSTANCE,
explainRequest).get();

IndexLifecycleExplainResponse indexLifecycleExplainResponse = explainResponse.getIndexResponses().get(managedIndex);
assertThat(indexLifecycleExplainResponse.getPhase(), is("cold"));
assertThat(indexLifecycleExplainResponse.getStep(), is(DataTierMigrationRoutedStep.NAME));
});

// remove the tier routing setting again
assertAcked(client().admin().indices().updateSettings(updateSettingsRequest).actionGet());

// wait for lifecycle to complete in the cold phase
assertBusy(() -> {
ExplainLifecycleRequest explainRequest = new ExplainLifecycleRequest().indices(managedIndex);
ExplainLifecycleResponse explainResponse = client().execute(ExplainLifecycleAction.INSTANCE,
explainRequest).get();

IndexLifecycleExplainResponse indexLifecycleExplainResponse = explainResponse.getIndexResponses().get(managedIndex);
assertThat(indexLifecycleExplainResponse.getPhase(), is("cold"));
assertThat(indexLifecycleExplainResponse.getStep(), is("complete"));
}, 30, TimeUnit.SECONDS);
}

private void assertReplicaIsUnassigned() {
ClusterAllocationExplainRequest explainReplicaShard =
new ClusterAllocationExplainRequest().setIndex(managedIndex).setPrimary(false).setShard(0);
ClusterAllocationExplainResponse response = client().admin().cluster().allocationExplain(explainReplicaShard).actionGet();
assertThat(response.getExplanation().getShardState(), is(ShardRoutingState.UNASSIGNED));
}
}

0 comments on commit d7a1eaa

Please sign in to comment.