From 394b280b3466b0345927292163610b9029ba49d9 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Thu, 19 May 2016 16:16:55 +0200 Subject: [PATCH 01/12] Limit retries of failed allocations per index Today if a shard fails during initialization phase due to misconfiguration, broken disks, missing analyzers, not installed plugins etc. elasticsaerch keeps on trying to initialize or rather allocate that shard. Yet, in the worst case scenario this ends in an endless allocation loop. To prevent this loop and all it's sideeffects like spamming log files over and over again this commit adds an allocation decider that stops allocating a shard that failed more than N times in a row to allocate. The number or retries can be configured via `index.allocation.max_retry` and it's default is set to `5`. Once the setting is updated shards with less failures than the number set per index will be allowed to allocate again. Internally we maintain a counter on the UnassignedInfo that is reset to `0` once the shards has been started. Relates to #18417 --- .../elasticsearch/cluster/ClusterModule.java | 2 + .../cluster/routing/UnassignedInfo.java | 32 +++- .../routing/allocation/AllocationService.java | 10 +- ...AllocateEmptyPrimaryAllocationCommand.java | 2 +- .../decider/MaxRetryAllocationDecider.java | 71 +++++++++ .../common/settings/IndexScopedSettings.java | 3 +- .../gateway/ReplicaShardAllocator.java | 2 +- .../cluster/routing/UnassignedInfoTests.java | 16 +- .../MaxRetryAllocationDeciderTests.java | 139 ++++++++++++++++++ .../SharedClusterSnapshotRestoreIT.java | 2 +- 10 files changed, 262 insertions(+), 17 deletions(-) create mode 100644 core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java create mode 100644 core/src/test/java/org/elasticsearch/cluster/routing/allocation/MaxRetryAllocationDeciderTests.java diff --git a/core/src/main/java/org/elasticsearch/cluster/ClusterModule.java b/core/src/main/java/org/elasticsearch/cluster/ClusterModule.java index 47dd2ce9ae6c3..a02e399ac0c4c 100644 --- a/core/src/main/java/org/elasticsearch/cluster/ClusterModule.java +++ b/core/src/main/java/org/elasticsearch/cluster/ClusterModule.java @@ -49,6 +49,7 @@ import org.elasticsearch.cluster.routing.allocation.decider.NodeVersionAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.RebalanceOnlyWhenActiveAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.ReplicaAfterPrimaryActiveAllocationDecider; +import org.elasticsearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.SameShardAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.SnapshotInProgressAllocationDecider; @@ -79,6 +80,7 @@ public class ClusterModule extends AbstractModule { new Setting<>("cluster.routing.allocation.type", BALANCED_ALLOCATOR, Function.identity(), Property.NodeScope); public static final List> DEFAULT_ALLOCATION_DECIDERS = Collections.unmodifiableList(Arrays.asList( + MaxRetryAllocationDecider.class, SameShardAllocationDecider.class, FilterAllocationDecider.class, ReplicaAfterPrimaryActiveAllocationDecider.class, diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java b/core/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java index 2670363364d53..d583b77cc96b9 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java @@ -48,7 +48,6 @@ public final class UnassignedInfo implements ToXContent, Writeable { public static final Setting INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING = Setting.timeSetting("index.unassigned.node_left.delayed_timeout", DEFAULT_DELAYED_NODE_LEFT_TIMEOUT, Property.Dynamic, Property.IndexScope); - /** * Reason why the shard is in unassigned state. *

@@ -103,7 +102,11 @@ public enum Reason { /** * A better replica location is identified and causes the existing replica allocation to be cancelled. */ - REALLOCATED_REPLICA; + REALLOCATED_REPLICA, + /** + * Unassigned as a result of a failed primary while the replica was initializing. + */ + PRIMARY_FAILED; } private final Reason reason; @@ -112,6 +115,7 @@ public enum Reason { private final long lastComputedLeftDelayNanos; // how long to delay shard allocation, not serialized (always positive, 0 means no delay) private final String message; private final Throwable failure; + private final int failedAllocations; /** * creates an UnassingedInfo object based **current** time @@ -120,7 +124,7 @@ public enum Reason { * @param message more information about cause. **/ public UnassignedInfo(Reason reason, String message) { - this(reason, message, null, System.nanoTime(), System.currentTimeMillis()); + this(reason, message, null, reason == Reason.ALLOCATION_FAILED ? 1 : 0, System.nanoTime(), System.currentTimeMillis()); } /** @@ -130,13 +134,16 @@ public UnassignedInfo(Reason reason, String message) { * @param unassignedTimeNanos the time to use as the base for any delayed re-assignment calculation * @param unassignedTimeMillis the time of unassignment used to display to in our reporting. */ - public UnassignedInfo(Reason reason, @Nullable String message, @Nullable Throwable failure, long unassignedTimeNanos, long unassignedTimeMillis) { + public UnassignedInfo(Reason reason, @Nullable String message, @Nullable Throwable failure, int failedAllocations, long unassignedTimeNanos, long unassignedTimeMillis) { this.reason = reason; this.unassignedTimeMillis = unassignedTimeMillis; this.unassignedTimeNanos = unassignedTimeNanos; this.lastComputedLeftDelayNanos = 0L; this.message = message; this.failure = failure; + this.failedAllocations = failedAllocations; + assert failedAllocations > 0 && reason == Reason.ALLOCATION_FAILED || failedAllocations == 0 && reason != Reason.ALLOCATION_FAILED: + "failedAllocations: " + 0 + " for reason " + reason; assert !(message == null && failure != null) : "provide a message if a failure exception is provided"; } @@ -147,17 +154,19 @@ public UnassignedInfo(UnassignedInfo unassignedInfo, long newComputedLeftDelayNa this.lastComputedLeftDelayNanos = newComputedLeftDelayNanos; this.message = unassignedInfo.message; this.failure = unassignedInfo.failure; + this.failedAllocations = unassignedInfo.failedAllocations; } public UnassignedInfo(StreamInput in) throws IOException { this.reason = Reason.values()[(int) in.readByte()]; this.unassignedTimeMillis = in.readLong(); // As System.nanoTime() cannot be compared across different JVMs, reset it to now. - // This means that in master failover situations, elapsed delay time is forgotten. + // This means that in master fail-over situations, elapsed delay time is forgotten. this.unassignedTimeNanos = System.nanoTime(); this.lastComputedLeftDelayNanos = 0L; this.message = in.readOptionalString(); this.failure = in.readThrowable(); + this.failedAllocations = in.readVInt(); } public void writeTo(StreamOutput out) throws IOException { @@ -166,12 +175,18 @@ public void writeTo(StreamOutput out) throws IOException { // Do not serialize unassignedTimeNanos as System.nanoTime() cannot be compared across different JVMs out.writeOptionalString(message); out.writeThrowable(failure); + out.writeVInt(failedAllocations); } public UnassignedInfo readFrom(StreamInput in) throws IOException { return new UnassignedInfo(in); } + /** + * Retruns the number of previously failed allocations of this shard. + */ + public int getNumFailedAllocations() {return failedAllocations;} + /** * The reason why the shard is unassigned. */ @@ -325,7 +340,11 @@ public String shortSummary() { StringBuilder sb = new StringBuilder(); sb.append("[reason=").append(reason).append("]"); sb.append(", at[").append(DATE_TIME_FORMATTER.printer().print(unassignedTimeMillis)).append("]"); + if (failedAllocations > 0) { + sb.append(", failed_attemps[").append(failedAllocations).append("]"); + } String details = getDetails(); + if (details != null) { sb.append(", details[").append(details).append("]"); } @@ -342,6 +361,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.startObject("unassigned_info"); builder.field("reason", reason); builder.field("at", DATE_TIME_FORMATTER.printer().print(unassignedTimeMillis)); + if (failedAllocations > 0) { + builder.field("failed_attemps", failedAllocations); + } String details = getDetails(); if (details != null) { builder.field("details", details); diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java index e1bbbb7f4ab96..65acbc749cf8b 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java @@ -222,8 +222,10 @@ public RoutingAllocation.Result applyFailedShards(ClusterState clusterState, Lis List orderedFailedShards = new ArrayList<>(failedShards); orderedFailedShards.sort(Comparator.comparing(failedShard -> failedShard.shard.primary())); for (FailedRerouteAllocation.FailedShard failedShard : orderedFailedShards) { + UnassignedInfo unassignedInfo = failedShard.shard.unassignedInfo(); + final int failedAllocations = unassignedInfo != null ? unassignedInfo.getNumFailedAllocations() : 0; changed |= applyFailedShard(allocation, failedShard.shard, true, new UnassignedInfo(UnassignedInfo.Reason.ALLOCATION_FAILED, failedShard.message, failedShard.failure, - System.nanoTime(), System.currentTimeMillis())); + failedAllocations + 1, System.nanoTime(), System.currentTimeMillis())); } if (!changed) { return new RoutingAllocation.Result(false, clusterState.routingTable(), clusterState.metaData()); @@ -437,7 +439,7 @@ private boolean deassociateDeadNodes(RoutingAllocation allocation) { // now, go over all the shards routing on the node, and fail them for (ShardRouting shardRouting : node.copyShards()) { UnassignedInfo unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.NODE_LEFT, "node_left[" + node.nodeId() + "]", null, - allocation.getCurrentNanoTime(), System.currentTimeMillis()); + 0, allocation.getCurrentNanoTime(), System.currentTimeMillis()); applyFailedShard(allocation, shardRouting, false, unassignedInfo); } // its a dead node, remove it, note, its important to remove it *after* we apply failed shard @@ -457,8 +459,8 @@ private boolean failReplicasForUnassignedPrimary(RoutingAllocation allocation, S boolean changed = false; for (ShardRouting routing : replicas) { changed |= applyFailedShard(allocation, routing, false, - new UnassignedInfo(UnassignedInfo.Reason.ALLOCATION_FAILED, "primary failed while replica initializing", - null, allocation.getCurrentNanoTime(), System.currentTimeMillis())); + new UnassignedInfo(UnassignedInfo.Reason.PRIMARY_FAILED, "primary failed while replica initializing", + null, 0, allocation.getCurrentNanoTime(), System.currentTimeMillis())); } return changed; } diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/command/AllocateEmptyPrimaryAllocationCommand.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/command/AllocateEmptyPrimaryAllocationCommand.java index d4191292cfcbe..c80afde308605 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/command/AllocateEmptyPrimaryAllocationCommand.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/command/AllocateEmptyPrimaryAllocationCommand.java @@ -125,7 +125,7 @@ public RerouteExplanation execute(RoutingAllocation allocation, boolean explain) // we need to move the unassigned info back to treat it as if it was index creation unassignedInfoToUpdate = new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "force empty allocation from previous reason " + shardRouting.unassignedInfo().getReason() + ", " + shardRouting.unassignedInfo().getMessage(), - shardRouting.unassignedInfo().getFailure(), System.nanoTime(), System.currentTimeMillis()); + shardRouting.unassignedInfo().getFailure(), 0, System.nanoTime(), System.currentTimeMillis()); } initializeUnassignedShard(allocation, routingNodes, routingNode, shardRouting, unassignedInfoToUpdate); diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java new file mode 100644 index 0000000000000..bcf25d0156a81 --- /dev/null +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java @@ -0,0 +1,71 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.cluster.routing.allocation.decider; + +import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.cluster.routing.RoutingNode; +import org.elasticsearch.cluster.routing.ShardRouting; +import org.elasticsearch.cluster.routing.UnassignedInfo; +import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.settings.Setting; +import org.elasticsearch.common.settings.Settings; + +/** + * An allocation decider that prevents shards from being allocated on any node if the shards allocation has been retried N times without + * success. This means if a shard has been INITIALIZING N times in a row without being moved to STARTED the shard will be ignored until + * the setting for index.allocation.max_retry is raised. The default value is 5. + */ +public class MaxRetryAllocationDecider extends AllocationDecider { + + public static final Setting SETTING_ALLOCATION_MAX_RETRY = Setting.intSetting("index.allocation.max_retry", 5, 0, + Setting.Property.Dynamic, Setting.Property.IndexScope); + + public static final String NAME = "max_retry"; + + /** + * Initializes a new {@link MaxRetryAllocationDecider} + * + * @param settings {@link Settings} used by this {@link AllocationDecider} + */ + @Inject + public MaxRetryAllocationDecider(Settings settings) { + super(settings); + } + + @Override + public Decision canAllocate(ShardRouting shardRouting, RoutingAllocation allocation) { + UnassignedInfo unassignedInfo = shardRouting.unassignedInfo(); + if (unassignedInfo != null && unassignedInfo.getNumFailedAllocations() > 0) { + IndexMetaData indexSafe = allocation.metaData().getIndexSafe(shardRouting.index()); + int maxRetry = SETTING_ALLOCATION_MAX_RETRY.get(indexSafe.getSettings()); + if (unassignedInfo.getNumFailedAllocations() >= maxRetry) { + return allocation.decision(Decision.NO, NAME, "shard has already failed allocating [" + + unassignedInfo.getNumFailedAllocations() + "] times"); + } + } + return allocation.decision(Decision.YES, NAME, "shard has no previous failures"); + } + + @Override + public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + return canAllocate(shardRouting, allocation); + } +} diff --git a/core/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java b/core/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java index 1b7952394579d..769c68829c791 100644 --- a/core/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java +++ b/core/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java @@ -21,6 +21,7 @@ import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.routing.UnassignedInfo; import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider; +import org.elasticsearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider; import org.elasticsearch.common.settings.Setting.Property; import org.elasticsearch.gateway.PrimaryShardAllocator; @@ -40,7 +41,6 @@ import org.elasticsearch.index.store.FsDirectoryService; import org.elasticsearch.index.store.IndexStore; import org.elasticsearch.index.store.Store; -import org.elasticsearch.index.IndexWarmer; import org.elasticsearch.indices.IndicesRequestCache; import java.util.Arrays; @@ -59,6 +59,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings { public static final Predicate INDEX_SETTINGS_KEY_PREDICATE = (s) -> s.startsWith(IndexMetaData.INDEX_SETTING_PREFIX); public static final Set> BUILT_IN_INDEX_SETTINGS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList( + MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY, IndexSettings.INDEX_TTL_DISABLE_PURGE_SETTING, IndexStore.INDEX_STORE_THROTTLE_TYPE_SETTING, IndexStore.INDEX_STORE_THROTTLE_MAX_BYTES_PER_SEC_SETTING, diff --git a/core/src/main/java/org/elasticsearch/gateway/ReplicaShardAllocator.java b/core/src/main/java/org/elasticsearch/gateway/ReplicaShardAllocator.java index d2e3d7f42cff7..8b6e425c26a31 100644 --- a/core/src/main/java/org/elasticsearch/gateway/ReplicaShardAllocator.java +++ b/core/src/main/java/org/elasticsearch/gateway/ReplicaShardAllocator.java @@ -108,7 +108,7 @@ public boolean processExistingRecoveries(RoutingAllocation allocation) { currentNode, nodeWithHighestMatch); it.moveToUnassigned(new UnassignedInfo(UnassignedInfo.Reason.REALLOCATED_REPLICA, "existing allocation of replica to [" + currentNode + "] cancelled, sync id match found on node [" + nodeWithHighestMatch + "]", - null, allocation.getCurrentNanoTime(), System.currentTimeMillis())); + null, 0, allocation.getCurrentNanoTime(), System.currentTimeMillis())); changed = true; } } diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/UnassignedInfoTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/UnassignedInfoTests.java index 708f8ca50790a..f46224570b0c9 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/UnassignedInfoTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/UnassignedInfoTests.java @@ -64,7 +64,8 @@ public void testReasonOrdinalOrder() { UnassignedInfo.Reason.NODE_LEFT, UnassignedInfo.Reason.REROUTE_CANCELLED, UnassignedInfo.Reason.REINITIALIZED, - UnassignedInfo.Reason.REALLOCATED_REPLICA}; + UnassignedInfo.Reason.REALLOCATED_REPLICA, + UnassignedInfo.Reason.PRIMARY_FAILED}; for (int i = 0; i < order.length; i++) { assertThat(order[i].ordinal(), equalTo(i)); } @@ -72,7 +73,10 @@ public void testReasonOrdinalOrder() { } public void testSerialization() throws Exception { - UnassignedInfo meta = new UnassignedInfo(RandomPicks.randomFrom(random(), UnassignedInfo.Reason.values()), randomBoolean() ? randomAsciiOfLength(4) : null); + UnassignedInfo.Reason reason = RandomPicks.randomFrom(random(), UnassignedInfo.Reason.values()); + UnassignedInfo meta = reason == UnassignedInfo.Reason.ALLOCATION_FAILED ? + new UnassignedInfo(reason, randomBoolean() ? randomAsciiOfLength(4) : null, null, randomIntBetween(1, 100), System.nanoTime(), System.currentTimeMillis()): + new UnassignedInfo(reason, randomBoolean() ? randomAsciiOfLength(4) : null); BytesStreamOutput out = new BytesStreamOutput(); meta.writeTo(out); out.close(); @@ -82,6 +86,7 @@ public void testSerialization() throws Exception { assertThat(read.getUnassignedTimeInMillis(), equalTo(meta.getUnassignedTimeInMillis())); assertThat(read.getMessage(), equalTo(meta.getMessage())); assertThat(read.getDetails(), equalTo(meta.getDetails())); + assertThat(read.getNumFailedAllocations(), equalTo(meta.getNumFailedAllocations())); } public void testIndexCreated() { @@ -273,7 +278,10 @@ public void testUnassignedDelayedOnlyOnNodeLeft() throws Exception { public void testUnassignedDelayOnlyNodeLeftNonNodeLeftReason() throws Exception { EnumSet reasons = EnumSet.allOf(UnassignedInfo.Reason.class); reasons.remove(UnassignedInfo.Reason.NODE_LEFT); - UnassignedInfo unassignedInfo = new UnassignedInfo(RandomPicks.randomFrom(random(), reasons), null); + UnassignedInfo.Reason reason = RandomPicks.randomFrom(random(), reasons); + UnassignedInfo unassignedInfo = reason == UnassignedInfo.Reason.ALLOCATION_FAILED ? + new UnassignedInfo(reason, null, null, 1, System.nanoTime(), System.currentTimeMillis()): + new UnassignedInfo(reason, null); unassignedInfo = unassignedInfo.updateDelay(unassignedInfo.getUnassignedTimeInNanos() + 1, // add 1 tick delay Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), "10h").build(), Settings.EMPTY); long delay = unassignedInfo.getLastComputedLeftDelayNanos(); @@ -287,7 +295,7 @@ public void testUnassignedDelayOnlyNodeLeftNonNodeLeftReason() throws Exception */ public void testLeftDelayCalculation() throws Exception { final long baseTime = System.nanoTime(); - UnassignedInfo unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.NODE_LEFT, "test", null, baseTime, System.currentTimeMillis()); + UnassignedInfo unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.NODE_LEFT, "test", null, 0, baseTime, System.currentTimeMillis()); final long totalDelayNanos = TimeValue.timeValueMillis(10).nanos(); final Settings settings = Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), TimeValue.timeValueNanos(totalDelayNanos)).build(); unassignedInfo = unassignedInfo.updateDelay(baseTime, settings, Settings.EMPTY); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/MaxRetryAllocationDeciderTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/MaxRetryAllocationDeciderTests.java new file mode 100644 index 0000000000000..a883be6f1b4fe --- /dev/null +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/MaxRetryAllocationDeciderTests.java @@ -0,0 +1,139 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.cluster.routing.allocation; + +import org.elasticsearch.Version; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.EmptyClusterInfoService; +import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.cluster.metadata.MetaData; +import org.elasticsearch.cluster.node.DiscoveryNodes; +import org.elasticsearch.cluster.routing.RoutingTable; +import org.elasticsearch.cluster.routing.allocation.allocator.BalancedShardsAllocator; +import org.elasticsearch.cluster.routing.allocation.decider.AllocationDeciders; +import org.elasticsearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.test.ESAllocationTestCase; +import org.elasticsearch.test.gateway.NoopGatewayAllocator; + +import java.util.Collections; +import java.util.List; + +import static org.elasticsearch.cluster.routing.ShardRoutingState.INITIALIZING; +import static org.elasticsearch.cluster.routing.ShardRoutingState.STARTED; +import static org.elasticsearch.cluster.routing.ShardRoutingState.UNASSIGNED; + +public class MaxRetryAllocationDeciderTests extends ESAllocationTestCase { + + public void testFailedAllocation() { + AllocationService strategy = new AllocationService(Settings.builder().build(), new AllocationDeciders(Settings.EMPTY, + Collections.singleton(new MaxRetryAllocationDecider(Settings.EMPTY))), + NoopGatewayAllocator.INSTANCE, new BalancedShardsAllocator(Settings.EMPTY), EmptyClusterInfoService.INSTANCE); + MetaData.Builder metaBuilder = MetaData.builder(); + metaBuilder.put(IndexMetaData.builder("idx").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(0)); + MetaData metaData = metaBuilder.build(); + RoutingTable.Builder routingTableBuilder = RoutingTable.builder(); + routingTableBuilder.addAsNew(metaData.index("idx")); + + RoutingTable routingTable = routingTableBuilder.build(); + ClusterState clusterState = ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT) + .metaData(metaData).routingTable(routingTable).build(); + clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2"))) + .build(); + RoutingTable prevRoutingTable = routingTable; + routingTable = strategy.reroute(clusterState, "reroute").routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + + assertEquals(prevRoutingTable.index("idx").shards().size(), 1); + assertEquals(prevRoutingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); + + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + // now fail it 4 times - 5 retries is default + for (int i = 0; i < 4; i++) { + List failedShards = Collections.singletonList( + new FailedRerouteAllocation.FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom" + i, + new UnsupportedOperationException())); + RoutingAllocation.Result result = strategy.applyFailedShards(clusterState, failedShards); + assertTrue(result.changed()); + routingTable = result.routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), i+1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom" + i); + } + // now we go and check that we are actually stick to unassigned on the next failure + { + List failedShards = Collections.singletonList( + new FailedRerouteAllocation.FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom", + new UnsupportedOperationException())); + RoutingAllocation.Result result = strategy.applyFailedShards(clusterState, failedShards); + assertTrue(result.changed()); + routingTable = result.routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), 5); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom"); + } + + // change the settings and ensure we can do another round of allocation for that index. + clusterState = ClusterState.builder(clusterState).routingTable(routingTable) + .metaData(MetaData.builder(clusterState.metaData()) + .put(IndexMetaData.builder(clusterState.metaData().index("idx")).settings( + Settings.builder().put(clusterState.metaData().index("idx").getSettings()).put("index.allocation.max_retry", 6).build() + ).build(), true).build()).build(); + RoutingAllocation.Result result = strategy.reroute(clusterState, "settings changed"); + assertTrue(result.changed()); + routingTable = result.routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + // good we are initializing and we are maintaining failure information + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), 5); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom"); + + // now we start the shard + routingTable = strategy.applyStartedShards(clusterState, Collections.singletonList(routingTable.index("idx") + .shard(0).shards().get(0))).routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + + // all counters have been reset to 0 ie. no unassigned info + assertEquals(routingTable.index("idx").shards().size(), 1); + assertNull(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo()); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), STARTED); + + // now fail again and see if it has a new counter + List failedShards = Collections.singletonList( + new FailedRerouteAllocation.FailedShard(routingTable.index("idx").shard(0).shards().get(0), "ZOOOMG", + new UnsupportedOperationException())); + result = strategy.applyFailedShards(clusterState, failedShards); + assertTrue(result.changed()); + routingTable = result.routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "ZOOOMG"); + + + } +} diff --git a/core/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java b/core/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java index 5ed63b519a43c..61acb81fa96cf 100644 --- a/core/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java +++ b/core/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java @@ -640,7 +640,7 @@ public void testDataFileFailureDuringRestore() throws Exception { assertAcked(client.admin().cluster().preparePutRepository("test-repo") .setType("fs").setSettings(Settings.builder().put("location", repositoryLocation))); - createIndex("test-idx"); + prepareCreate("test-idx").setSettings(Settings.builder().put("index.allocation.max_retry", Integer.MAX_VALUE)); ensureGreen(); logger.info("--> indexing some data"); From 21887c1d1db3157f465fad719a91b03124ac9da3 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Thu, 19 May 2016 17:41:57 +0200 Subject: [PATCH 02/12] apply feedback from @dakrone --- .../org/elasticsearch/cluster/routing/UnassignedInfo.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java b/core/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java index d583b77cc96b9..626a062588a72 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java @@ -143,7 +143,7 @@ public UnassignedInfo(Reason reason, @Nullable String message, @Nullable Throwab this.failure = failure; this.failedAllocations = failedAllocations; assert failedAllocations > 0 && reason == Reason.ALLOCATION_FAILED || failedAllocations == 0 && reason != Reason.ALLOCATION_FAILED: - "failedAllocations: " + 0 + " for reason " + reason; + "failedAllocations: " + failedAllocations + " for reason " + reason; assert !(message == null && failure != null) : "provide a message if a failure exception is provided"; } @@ -341,7 +341,7 @@ public String shortSummary() { sb.append("[reason=").append(reason).append("]"); sb.append(", at[").append(DATE_TIME_FORMATTER.printer().print(unassignedTimeMillis)).append("]"); if (failedAllocations > 0) { - sb.append(", failed_attemps[").append(failedAllocations).append("]"); + sb.append(", failed_attempts[").append(failedAllocations).append("]"); } String details = getDetails(); @@ -362,7 +362,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field("reason", reason); builder.field("at", DATE_TIME_FORMATTER.printer().print(unassignedTimeMillis)); if (failedAllocations > 0) { - builder.field("failed_attemps", failedAllocations); + builder.field("failed_attempts", failedAllocations); } String details = getDetails(); if (details != null) { From b8d56c7780bfc24c18595c107ba2643c1f17bc3d Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Thu, 19 May 2016 17:46:22 +0200 Subject: [PATCH 03/12] fix test --- .../elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java b/core/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java index 61acb81fa96cf..3dbcedf3ab166 100644 --- a/core/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java +++ b/core/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java @@ -640,7 +640,7 @@ public void testDataFileFailureDuringRestore() throws Exception { assertAcked(client.admin().cluster().preparePutRepository("test-repo") .setType("fs").setSettings(Settings.builder().put("location", repositoryLocation))); - prepareCreate("test-idx").setSettings(Settings.builder().put("index.allocation.max_retry", Integer.MAX_VALUE)); + prepareCreate("test-idx").setSettings(Settings.builder().put("index.allocation.max_retry", Integer.MAX_VALUE)).get(); ensureGreen(); logger.info("--> indexing some data"); From cb25e791a3e11bb3870118e8d566ed12a694fcd1 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Thu, 19 May 2016 17:50:26 +0200 Subject: [PATCH 04/12] * Rename max_retry to max_retries * append unassigned info to allocaiton decision. --- .../routing/allocation/decider/MaxRetryAllocationDecider.java | 4 ++-- .../routing/allocation/MaxRetryAllocationDeciderTests.java | 2 +- .../snapshots/SharedClusterSnapshotRestoreIT.java | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java index bcf25d0156a81..553c94c1df7ed 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java @@ -35,7 +35,7 @@ */ public class MaxRetryAllocationDecider extends AllocationDecider { - public static final Setting SETTING_ALLOCATION_MAX_RETRY = Setting.intSetting("index.allocation.max_retry", 5, 0, + public static final Setting SETTING_ALLOCATION_MAX_RETRY = Setting.intSetting("index.allocation.max_retries", 5, 0, Setting.Property.Dynamic, Setting.Property.IndexScope); public static final String NAME = "max_retry"; @@ -58,7 +58,7 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingAllocation allocat int maxRetry = SETTING_ALLOCATION_MAX_RETRY.get(indexSafe.getSettings()); if (unassignedInfo.getNumFailedAllocations() >= maxRetry) { return allocation.decision(Decision.NO, NAME, "shard has already failed allocating [" - + unassignedInfo.getNumFailedAllocations() + "] times"); + + unassignedInfo.getNumFailedAllocations() + "] times " + unassignedInfo.toString()); } } return allocation.decision(Decision.YES, NAME, "shard has no previous failures"); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/MaxRetryAllocationDeciderTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/MaxRetryAllocationDeciderTests.java index a883be6f1b4fe..1e89a6ecca819 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/MaxRetryAllocationDeciderTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/MaxRetryAllocationDeciderTests.java @@ -99,7 +99,7 @@ public void testFailedAllocation() { clusterState = ClusterState.builder(clusterState).routingTable(routingTable) .metaData(MetaData.builder(clusterState.metaData()) .put(IndexMetaData.builder(clusterState.metaData().index("idx")).settings( - Settings.builder().put(clusterState.metaData().index("idx").getSettings()).put("index.allocation.max_retry", 6).build() + Settings.builder().put(clusterState.metaData().index("idx").getSettings()).put("index.allocation.max_retries", 6).build() ).build(), true).build()).build(); RoutingAllocation.Result result = strategy.reroute(clusterState, "settings changed"); assertTrue(result.changed()); diff --git a/core/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java b/core/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java index 3dbcedf3ab166..7ca30132a4a6b 100644 --- a/core/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java +++ b/core/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java @@ -640,7 +640,7 @@ public void testDataFileFailureDuringRestore() throws Exception { assertAcked(client.admin().cluster().preparePutRepository("test-repo") .setType("fs").setSettings(Settings.builder().put("location", repositoryLocation))); - prepareCreate("test-idx").setSettings(Settings.builder().put("index.allocation.max_retry", Integer.MAX_VALUE)).get(); + prepareCreate("test-idx").setSettings(Settings.builder().put("index.allocation.max_retries", Integer.MAX_VALUE)).get(); ensureGreen(); logger.info("--> indexing some data"); From 0aef3d15b55c1f6b21125864a0dcb86697d7542d Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Thu, 19 May 2016 17:52:39 +0200 Subject: [PATCH 05/12] simplify assertion --- .../org/elasticsearch/cluster/routing/UnassignedInfo.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java b/core/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java index 626a062588a72..bc44cd1701c93 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java @@ -142,7 +142,7 @@ public UnassignedInfo(Reason reason, @Nullable String message, @Nullable Throwab this.message = message; this.failure = failure; this.failedAllocations = failedAllocations; - assert failedAllocations > 0 && reason == Reason.ALLOCATION_FAILED || failedAllocations == 0 && reason != Reason.ALLOCATION_FAILED: + assert (failedAllocations > 0) == (reason == Reason.ALLOCATION_FAILED): "failedAllocations: " + failedAllocations + " for reason " + reason; assert !(message == null && failure != null) : "provide a message if a failure exception is provided"; } @@ -183,9 +183,9 @@ public UnassignedInfo readFrom(StreamInput in) throws IOException { } /** - * Retruns the number of previously failed allocations of this shard. + * Returns the number of previously failed allocations of this shard. */ - public int getNumFailedAllocations() {return failedAllocations;} + public int getNumFailedAllocations() { return failedAllocations; } /** * The reason why the shard is unassigned. From 4a4436bb81da426a9167d7ae89dc593de0d28cc5 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 20 May 2016 09:10:33 +0200 Subject: [PATCH 06/12] apply feedback --- .../allocation/decider/MaxRetryAllocationDecider.java | 7 ++++--- .../allocation/MaxRetryAllocationDeciderTests.java | 11 ++++++----- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java index 553c94c1df7ed..5ca83715a28d0 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java @@ -54,11 +54,12 @@ public MaxRetryAllocationDecider(Settings settings) { public Decision canAllocate(ShardRouting shardRouting, RoutingAllocation allocation) { UnassignedInfo unassignedInfo = shardRouting.unassignedInfo(); if (unassignedInfo != null && unassignedInfo.getNumFailedAllocations() > 0) { - IndexMetaData indexSafe = allocation.metaData().getIndexSafe(shardRouting.index()); - int maxRetry = SETTING_ALLOCATION_MAX_RETRY.get(indexSafe.getSettings()); + final IndexMetaData indexMetaData = allocation.metaData().getIndexSafe(shardRouting.index()); + final int maxRetry = SETTING_ALLOCATION_MAX_RETRY.get(indexMetaData.getSettings()); if (unassignedInfo.getNumFailedAllocations() >= maxRetry) { return allocation.decision(Decision.NO, NAME, "shard has already failed allocating [" - + unassignedInfo.getNumFailedAllocations() + "] times " + unassignedInfo.toString()); + + unassignedInfo.getNumFailedAllocations() + "] times vs. [" + maxRetry + "] retries allowed" + + unassignedInfo.toString() + " raise [" + SETTING_ALLOCATION_MAX_RETRY.getKey() + "] to retry"); } } return allocation.decision(Decision.YES, NAME, "shard has no previous failures"); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/MaxRetryAllocationDeciderTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/MaxRetryAllocationDeciderTests.java index 1e89a6ecca819..def9eff5caabd 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/MaxRetryAllocationDeciderTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/MaxRetryAllocationDeciderTests.java @@ -66,8 +66,9 @@ public void testFailedAllocation() { assertEquals(routingTable.index("idx").shards().size(), 1); assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); - // now fail it 4 times - 5 retries is default - for (int i = 0; i < 4; i++) { + final int retries = MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY.get(Settings.EMPTY); + // now fail it N-1 times + for (int i = 0; i < retries-1; i++) { List failedShards = Collections.singletonList( new FailedRerouteAllocation.FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom" + i, new UnsupportedOperationException())); @@ -90,7 +91,7 @@ public void testFailedAllocation() { routingTable = result.routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertEquals(routingTable.index("idx").shards().size(), 1); - assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), 5); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries); assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom"); } @@ -99,7 +100,7 @@ public void testFailedAllocation() { clusterState = ClusterState.builder(clusterState).routingTable(routingTable) .metaData(MetaData.builder(clusterState.metaData()) .put(IndexMetaData.builder(clusterState.metaData().index("idx")).settings( - Settings.builder().put(clusterState.metaData().index("idx").getSettings()).put("index.allocation.max_retries", 6).build() + Settings.builder().put(clusterState.metaData().index("idx").getSettings()).put("index.allocation.max_retries", retries+1).build() ).build(), true).build()).build(); RoutingAllocation.Result result = strategy.reroute(clusterState, "settings changed"); assertTrue(result.changed()); @@ -107,7 +108,7 @@ public void testFailedAllocation() { clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); // good we are initializing and we are maintaining failure information assertEquals(routingTable.index("idx").shards().size(), 1); - assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), 5); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries); assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom"); From 6e882238885da90bfe6ed67b822417d8506427c0 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 20 May 2016 09:14:07 +0200 Subject: [PATCH 07/12] fix line length in test --- .../routing/allocation/MaxRetryAllocationDeciderTests.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/MaxRetryAllocationDeciderTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/MaxRetryAllocationDeciderTests.java index def9eff5caabd..a16c67af243f8 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/MaxRetryAllocationDeciderTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/MaxRetryAllocationDeciderTests.java @@ -100,7 +100,8 @@ public void testFailedAllocation() { clusterState = ClusterState.builder(clusterState).routingTable(routingTable) .metaData(MetaData.builder(clusterState.metaData()) .put(IndexMetaData.builder(clusterState.metaData().index("idx")).settings( - Settings.builder().put(clusterState.metaData().index("idx").getSettings()).put("index.allocation.max_retries", retries+1).build() + Settings.builder().put(clusterState.metaData().index("idx").getSettings()).put("index.allocation.max_retries", + retries+1).build() ).build(), true).build()).build(); RoutingAllocation.Result result = strategy.reroute(clusterState, "settings changed"); assertTrue(result.changed()); @@ -134,7 +135,5 @@ public void testFailedAllocation() { assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), 1); assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "ZOOOMG"); - - } } From 2544fec8c509c64a245bee063294ece4c04bdc33 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 20 May 2016 10:46:30 +0200 Subject: [PATCH 08/12] add retry_failed=true|false flag to reroute API and add documentation --- ...ansportClusterAllocationExplainAction.java | 2 +- .../reroute/ClusterRerouteRequest.java | 18 +++++ .../reroute/ClusterRerouteRequestBuilder.java | 9 +++ .../TransportClusterRerouteAction.java | 4 +- .../routing/allocation/AllocationService.java | 12 ++- .../allocation/FailedRerouteAllocation.java | 2 +- .../routing/allocation/RoutingAllocation.java | 9 ++- .../allocation/StartedRerouteAllocation.java | 2 +- .../decider/MaxRetryAllocationDecider.java | 19 ++++- .../reroute/RestClusterRerouteAction.java | 1 + .../allocation/AllocationCommandsTests.java | 46 +++++------ .../allocation/AwarenessAllocationTests.java | 2 +- .../allocation/DeadNodesAllocationTests.java | 8 +- .../ExpectedShardSizeAllocationTests.java | 2 +- .../allocation/FailedShardsRoutingTests.java | 8 +- .../MaxRetryAllocationDeciderTests.java | 81 +++++++++++++++++-- .../NodeVersionAllocationDeciderTests.java | 4 +- .../allocation/ThrottlingAllocationTests.java | 6 +- .../decider/DiskThresholdDeciderTests.java | 14 ++-- .../DiskThresholdDeciderUnitTests.java | 4 +- .../gateway/PrimaryShardAllocatorTests.java | 18 ++--- .../gateway/ReplicaShardAllocatorTests.java | 4 +- .../indices/state/RareClusterStateIT.java | 2 +- docs/reference/cluster/reroute.asciidoc | 12 +++ .../rest-api-spec/api/cluster.reroute.json | 4 + 25 files changed, 214 insertions(+), 79 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/action/admin/cluster/allocation/TransportClusterAllocationExplainAction.java b/core/src/main/java/org/elasticsearch/action/admin/cluster/allocation/TransportClusterAllocationExplainAction.java index 28b62083d42da..f986a5679a943 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/cluster/allocation/TransportClusterAllocationExplainAction.java +++ b/core/src/main/java/org/elasticsearch/action/admin/cluster/allocation/TransportClusterAllocationExplainAction.java @@ -250,7 +250,7 @@ protected void masterOperation(final ClusterAllocationExplainRequest request, fi final ActionListener listener) { final RoutingNodes routingNodes = state.getRoutingNodes(); final RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, state, - clusterInfoService.getClusterInfo(), System.nanoTime()); + clusterInfoService.getClusterInfo(), System.nanoTime(), false); ShardRouting foundShard = null; if (request.useAnyUnassignedShard()) { diff --git a/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequest.java b/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequest.java index a241f01ea28cc..2a38315f635ea 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequest.java +++ b/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequest.java @@ -41,6 +41,7 @@ public class ClusterRerouteRequest extends AcknowledgedRequestfalse). If true, the + * request will retry allocating shards that are currently can't be allocated due to too many allocation failures. + */ + public ClusterRerouteRequest setRetryFailed(boolean retryFailed) { + this.retryFailed = retryFailed; + return this; + } + /** * Returns the current explain flag */ @@ -88,6 +98,14 @@ public boolean explain() { return this.explain; } + /** + * Returns the current retry failed flag + */ + public boolean isRetryFailed() { + return this.retryFailed; + } + + /** * Set the allocation commands to execute. */ diff --git a/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequestBuilder.java b/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequestBuilder.java index 7f0ad81a00dbb..2ae0570a6b8d8 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequestBuilder.java +++ b/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequestBuilder.java @@ -60,6 +60,15 @@ public ClusterRerouteRequestBuilder setExplain(boolean explain) { return this; } + /** + * Sets the retry failed flag (defaults to false). If true, the + * request will retry allocating shards that are currently can't be allocated due to too many allocation failures. + */ + public ClusterRerouteRequestBuilder setRetryFailed(boolean retryFailed) { + request.setRetryFailed(retryFailed); + return this; + } + /** * Sets the commands for the request to execute. */ diff --git a/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/TransportClusterRerouteAction.java b/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/TransportClusterRerouteAction.java index e6116dbfbc41e..2a834c3fcfae8 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/TransportClusterRerouteAction.java +++ b/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/TransportClusterRerouteAction.java @@ -91,7 +91,7 @@ public void onFailure(String source, Throwable t) { @Override public ClusterState execute(ClusterState currentState) { - RoutingAllocation.Result routingResult = allocationService.reroute(currentState, request.commands, request.explain()); + RoutingAllocation.Result routingResult = allocationService.reroute(currentState, request.commands, request.explain(), true); ClusterState newState = ClusterState.builder(currentState).routingResult(routingResult).build(); clusterStateToSend = newState; explanations = routingResult.explanations(); @@ -102,4 +102,4 @@ public ClusterState execute(ClusterState currentState) { } }); } -} \ No newline at end of file +} diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java index 65acbc749cf8b..d59113675d8ff 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java @@ -259,16 +259,13 @@ private String firstListElementsToCommaDelimitedString(List elements, Fun .collect(Collectors.joining(", ")); } - public RoutingAllocation.Result reroute(ClusterState clusterState, AllocationCommands commands) { - return reroute(clusterState, commands, false); - } - - public RoutingAllocation.Result reroute(ClusterState clusterState, AllocationCommands commands, boolean explain) { + public RoutingAllocation.Result reroute(ClusterState clusterState, AllocationCommands commands, boolean explain, boolean retryFailed) { RoutingNodes routingNodes = getMutableRoutingNodes(clusterState); // we don't shuffle the unassigned shards here, to try and get as close as possible to // a consistent result of the effect the commands have on the routing // this allows systems to dry run the commands, see the resulting cluster state, and act on it - RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, clusterState, clusterInfoService.getClusterInfo(), currentNanoTime()); + RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, clusterState, + clusterInfoService.getClusterInfo(), currentNanoTime(), retryFailed); // don't short circuit deciders, we want a full explanation allocation.debugDecision(true); // we ignore disable allocation, because commands are explicit @@ -307,7 +304,8 @@ protected RoutingAllocation.Result reroute(ClusterState clusterState, String rea RoutingNodes routingNodes = getMutableRoutingNodes(clusterState); // shuffle the unassigned nodes, just so we won't have things like poison failed shards routingNodes.unassigned().shuffle(); - RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, clusterState, clusterInfoService.getClusterInfo(), currentNanoTime()); + RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, clusterState, + clusterInfoService.getClusterInfo(), currentNanoTime(), false); allocation.debugDecision(debug); if (!reroute(allocation)) { return new RoutingAllocation.Result(false, clusterState.routingTable(), clusterState.metaData()); diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/FailedRerouteAllocation.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/FailedRerouteAllocation.java index a13862fed260a..ef2e42eed764c 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/FailedRerouteAllocation.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/FailedRerouteAllocation.java @@ -58,7 +58,7 @@ public String toString() { private final List failedShards; public FailedRerouteAllocation(AllocationDeciders deciders, RoutingNodes routingNodes, ClusterState clusterState, List failedShards, ClusterInfo clusterInfo) { - super(deciders, routingNodes, clusterState, clusterInfo, System.nanoTime()); + super(deciders, routingNodes, clusterState, clusterInfo, System.nanoTime(), false); this.failedShards = failedShards; } diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java index 60ca3a8d5fd45..0df8074e14c3a 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java @@ -134,6 +134,8 @@ public RoutingExplanations explanations() { private boolean ignoreDisable = false; + private final boolean retryFailed; + private boolean debugDecision = false; private boolean hasPendingAsyncFetch = false; @@ -148,7 +150,7 @@ public RoutingExplanations explanations() { * @param clusterState cluster state before rerouting * @param currentNanoTime the nano time to use for all delay allocation calculation (typically {@link System#nanoTime()}) */ - public RoutingAllocation(AllocationDeciders deciders, RoutingNodes routingNodes, ClusterState clusterState, ClusterInfo clusterInfo, long currentNanoTime) { + public RoutingAllocation(AllocationDeciders deciders, RoutingNodes routingNodes, ClusterState clusterState, ClusterInfo clusterInfo, long currentNanoTime, boolean retryFailed) { this.deciders = deciders; this.routingNodes = routingNodes; this.metaData = clusterState.metaData(); @@ -156,6 +158,7 @@ public RoutingAllocation(AllocationDeciders deciders, RoutingNodes routingNodes, this.customs = clusterState.customs(); this.clusterInfo = clusterInfo; this.currentNanoTime = currentNanoTime; + this.retryFailed = retryFailed; } /** returns the nano time captured at the beginning of the allocation. used to make sure all time based decisions are aligned */ @@ -297,4 +300,8 @@ public boolean hasPendingAsyncFetch() { public void setHasPendingAsyncFetch() { this.hasPendingAsyncFetch = true; } + + public boolean isRetryFailed() { + return retryFailed; + } } diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/StartedRerouteAllocation.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/StartedRerouteAllocation.java index e9570edd9c3bd..0f55ab4fda1ae 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/StartedRerouteAllocation.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/StartedRerouteAllocation.java @@ -36,7 +36,7 @@ public class StartedRerouteAllocation extends RoutingAllocation { private final List startedShards; public StartedRerouteAllocation(AllocationDeciders deciders, RoutingNodes routingNodes, ClusterState clusterState, List startedShards, ClusterInfo clusterInfo) { - super(deciders, routingNodes, clusterState, clusterInfo, System.nanoTime()); + super(deciders, routingNodes, clusterState, clusterInfo, System.nanoTime(), false); this.startedShards = startedShards; } diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java index 5ca83715a28d0..31b8ea940e2be 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java @@ -32,6 +32,10 @@ * An allocation decider that prevents shards from being allocated on any node if the shards allocation has been retried N times without * success. This means if a shard has been INITIALIZING N times in a row without being moved to STARTED the shard will be ignored until * the setting for index.allocation.max_retry is raised. The default value is 5. + * Note: This allocation decider also allows allocation of repeatedly failing shards when the /_cluster/reroute?retry_failed=true + * API is manually invoked. This allows single retries without raising the limits. + * + * @see RoutingAllocation#isRetryFailed() */ public class MaxRetryAllocationDecider extends AllocationDecider { @@ -53,13 +57,22 @@ public MaxRetryAllocationDecider(Settings settings) { @Override public Decision canAllocate(ShardRouting shardRouting, RoutingAllocation allocation) { UnassignedInfo unassignedInfo = shardRouting.unassignedInfo(); - if (unassignedInfo != null && unassignedInfo.getNumFailedAllocations() > 0) { + if (unassignedInfo != null && unassignedInfo.getNumFailedAllocations() > 0 + || allocation.isRetryFailed() // if we are called via the _reroute API we ignore the failure counter and try to allocate + // this improves the usability since people don't need to raise the limits to issue retries since a simple _reroute call is + // enough to manually retry. + ) { final IndexMetaData indexMetaData = allocation.metaData().getIndexSafe(shardRouting.index()); final int maxRetry = SETTING_ALLOCATION_MAX_RETRY.get(indexMetaData.getSettings()); + if (allocation.isRetryFailed()) { //manual allocation - retry + return allocation.decision(Decision.YES, NAME, "shard has already failed allocating [" + + unassignedInfo.getNumFailedAllocations() + "] times vs. [" + maxRetry + "] retries allowed " + + unassignedInfo.toString() + " - retrying once on manual allocation"); + } if (unassignedInfo.getNumFailedAllocations() >= maxRetry) { return allocation.decision(Decision.NO, NAME, "shard has already failed allocating [" - + unassignedInfo.getNumFailedAllocations() + "] times vs. [" + maxRetry + "] retries allowed" - + unassignedInfo.toString() + " raise [" + SETTING_ALLOCATION_MAX_RETRY.getKey() + "] to retry"); + + unassignedInfo.getNumFailedAllocations() + "] times vs. [" + maxRetry + "] retries allowed " + + unassignedInfo.toString() + " - manually call [/_cluster/reroute?retry_failed=true] to retry"); } } return allocation.decision(Decision.YES, NAME, "shard has no previous failures"); diff --git a/core/src/main/java/org/elasticsearch/rest/action/admin/cluster/reroute/RestClusterRerouteAction.java b/core/src/main/java/org/elasticsearch/rest/action/admin/cluster/reroute/RestClusterRerouteAction.java index 8a4afd89ac485..b34c6726c0986 100644 --- a/core/src/main/java/org/elasticsearch/rest/action/admin/cluster/reroute/RestClusterRerouteAction.java +++ b/core/src/main/java/org/elasticsearch/rest/action/admin/cluster/reroute/RestClusterRerouteAction.java @@ -64,6 +64,7 @@ public RestClusterRerouteAction(Settings settings, RestController controller, Cl public void handleRequest(final RestRequest request, final RestChannel channel, final Client client) throws Exception { final ClusterRerouteRequest clusterRerouteRequest = Requests.clusterRerouteRequest(); clusterRerouteRequest.dryRun(request.paramAsBoolean("dry_run", clusterRerouteRequest.dryRun())); + clusterRerouteRequest.setRetryFailed(request.paramAsBoolean("retry_failed", clusterRerouteRequest.isRetryFailed())); clusterRerouteRequest.explain(request.paramAsBoolean("explain", clusterRerouteRequest.explain())); clusterRerouteRequest.timeout(request.paramAsTime("timeout", clusterRerouteRequest.timeout())); clusterRerouteRequest.masterNodeTimeout(request.paramAsTime("master_timeout", clusterRerouteRequest.masterNodeTimeout())); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationCommandsTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationCommandsTests.java index 7aa8576ece340..b63692e0d2a51 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationCommandsTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationCommandsTests.java @@ -94,7 +94,7 @@ public void testMoveShardCommand() { } else { toNodeId = "node1"; } - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", 0, existingNodeId, toNodeId))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", 0, existingNodeId, toNodeId)), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node(existingNodeId).iterator().next().state(), equalTo(ShardRoutingState.RELOCATING)); @@ -148,7 +148,7 @@ public void testAllocateCommand() { logger.info("--> allocating to non-existent node, should fail"); try { - allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand(index, shardId.id(), "node42"))); + allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand(index, shardId.id(), "node42")), false, false); fail("expected IllegalArgumentException when allocating to non-existing node"); } catch (IllegalArgumentException e) { assertThat(e.getMessage(), containsString("failed to resolve [node42], no matching nodes")); @@ -156,7 +156,7 @@ public void testAllocateCommand() { logger.info("--> allocating to non-data node, should fail"); try { - allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand(index, shardId.id(), "node4"))); + allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand(index, shardId.id(), "node4")), false, false); fail("expected IllegalArgumentException when allocating to non-data node"); } catch (IllegalArgumentException e) { assertThat(e.getMessage(), containsString("allocation can only be done on data nodes")); @@ -164,7 +164,7 @@ public void testAllocateCommand() { logger.info("--> allocating non-existing shard, should fail"); try { - allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand("test", 1, "node2"))); + allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand("test", 1, "node2")), false, false); fail("expected ShardNotFoundException when allocating non-existing shard"); } catch (ShardNotFoundException e) { assertThat(e.getMessage(), containsString("no such shard")); @@ -172,7 +172,7 @@ public void testAllocateCommand() { logger.info("--> allocating non-existing index, should fail"); try { - allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand("test2", 0, "node2"))); + allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand("test2", 0, "node2")), false, false); fail("expected ShardNotFoundException when allocating non-existing index"); } catch (IndexNotFoundException e) { assertThat(e.getMessage(), containsString("no such index")); @@ -180,7 +180,7 @@ public void testAllocateCommand() { logger.info("--> allocating empty primary with acceptDataLoss flag set to false"); try { - allocation.reroute(clusterState, new AllocationCommands(new AllocateEmptyPrimaryAllocationCommand("test", 0, "node1", false))); + allocation.reroute(clusterState, new AllocationCommands(new AllocateEmptyPrimaryAllocationCommand("test", 0, "node1", false)), false, false); fail("expected IllegalArgumentException when allocating empty primary with acceptDataLoss flag set to false"); } catch (IllegalArgumentException e) { assertThat(e.getMessage(), containsString("allocating an empty primary for " + shardId + " can result in data loss. Please confirm by setting the accept_data_loss parameter to true")); @@ -188,14 +188,14 @@ public void testAllocateCommand() { logger.info("--> allocating stale primary with acceptDataLoss flag set to false"); try { - allocation.reroute(clusterState, new AllocationCommands(new AllocateStalePrimaryAllocationCommand(index, shardId.id(), "node1", false))); + allocation.reroute(clusterState, new AllocationCommands(new AllocateStalePrimaryAllocationCommand(index, shardId.id(), "node1", false)), false, false); fail("expected IllegalArgumentException when allocating stale primary with acceptDataLoss flag set to false"); } catch (IllegalArgumentException e) { assertThat(e.getMessage(), containsString("allocating an empty primary for " + shardId + " can result in data loss. Please confirm by setting the accept_data_loss parameter to true")); } logger.info("--> allocating empty primary with acceptDataLoss flag set to true"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateEmptyPrimaryAllocationCommand("test", 0, "node1", true))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateEmptyPrimaryAllocationCommand("test", 0, "node1", true)), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); @@ -211,13 +211,13 @@ public void testAllocateCommand() { logger.info("--> allocate the replica shard on the primary shard node, should fail"); try { - allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node1"))); + allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node1")), false, false); fail("expected IllegalArgumentException when allocating replica shard on the primary shard node"); } catch (IllegalArgumentException e) { } logger.info("--> allocate the replica shard on on the second node"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node2"))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node2")), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); @@ -236,7 +236,7 @@ public void testAllocateCommand() { logger.info("--> verify that we fail when there are no unassigned shards"); try { - allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand("test", 0, "node3"))); + allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand("test", 0, "node3")), false, false); fail("expected IllegalArgumentException when allocating shard while no unassigned shard available"); } catch (IllegalArgumentException e) { } @@ -268,7 +268,7 @@ public void testCancelCommand() { assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(0)); logger.info("--> allocating empty primary shard with accept_data_loss flag set to true"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateEmptyPrimaryAllocationCommand("test", 0, "node1", true))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateEmptyPrimaryAllocationCommand("test", 0, "node1", true)), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); @@ -277,7 +277,7 @@ public void testCancelCommand() { logger.info("--> cancel primary allocation, make sure it fails..."); try { - allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", false))); + allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", false)), false, false); fail(); } catch (IllegalArgumentException e) { } @@ -291,13 +291,13 @@ public void testCancelCommand() { logger.info("--> cancel primary allocation, make sure it fails..."); try { - allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", false))); + allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", false)), false, false); fail(); } catch (IllegalArgumentException e) { } logger.info("--> allocate the replica shard on on the second node"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node2"))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node2")), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); @@ -306,7 +306,7 @@ public void testCancelCommand() { assertThat(clusterState.getRoutingNodes().node("node2").shardsWithState(INITIALIZING).size(), equalTo(1)); logger.info("--> cancel the relocation allocation"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node2", false))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node2", false)), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); @@ -315,7 +315,7 @@ public void testCancelCommand() { assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(0)); logger.info("--> allocate the replica shard on on the second node"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node2"))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node2")), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); @@ -325,7 +325,7 @@ public void testCancelCommand() { logger.info("--> cancel the primary being replicated, make sure it fails"); try { - allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", false))); + allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", false)), false, false); fail(); } catch (IllegalArgumentException e) { } @@ -339,7 +339,7 @@ public void testCancelCommand() { assertThat(clusterState.getRoutingNodes().node("node2").shardsWithState(STARTED).size(), equalTo(1)); logger.info("--> cancel allocation of the replica shard"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node2", false))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node2", false)), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); @@ -348,7 +348,7 @@ public void testCancelCommand() { assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(0)); logger.info("--> allocate the replica shard on on the second node"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node2"))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node2")), false, false); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(rerouteResult.changed(), equalTo(true)); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); @@ -364,7 +364,7 @@ public void testCancelCommand() { assertThat(clusterState.getRoutingNodes().node("node2").shardsWithState(STARTED).size(), equalTo(1)); logger.info("--> move the replica shard"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", 0, "node2", "node3"))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", 0, "node2", "node3")), false, false); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1)); @@ -374,7 +374,7 @@ public void testCancelCommand() { assertThat(clusterState.getRoutingNodes().node("node3").shardsWithState(INITIALIZING).size(), equalTo(1)); logger.info("--> cancel the move of the replica shard"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node3", false))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node3", false)), false, false); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1)); @@ -383,7 +383,7 @@ public void testCancelCommand() { logger.info("--> cancel the primary allocation (with allow_primary set to true)"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", true))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", true)), false, false); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(rerouteResult.changed(), equalTo(true)); logger.error(clusterState.prettyPrint()); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AwarenessAllocationTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AwarenessAllocationTests.java index ee993bf3ebd3a..805ab0321ba92 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AwarenessAllocationTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AwarenessAllocationTests.java @@ -868,7 +868,7 @@ public void testUnassignedShardsWithUnbalancedZones() { } commands.add(new MoveAllocationCommand("test", 0, primaryNode, "A-4")); - routingTable = strategy.reroute(clusterState, commands).routingTable(); + routingTable = strategy.reroute(clusterState, commands, false, false).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(0)); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/DeadNodesAllocationTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/DeadNodesAllocationTests.java index 11b78d2ae6aa5..b14aeca890e9d 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/DeadNodesAllocationTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/DeadNodesAllocationTests.java @@ -149,8 +149,8 @@ public void testDeadNodeWhileRelocatingOnToNode() { logger.info("--> moving primary shard to node3"); rerouteResult = allocation.reroute(clusterState, new AllocationCommands( - new MoveAllocationCommand("test", 0, clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")) - ); + new MoveAllocationCommand("test", 0, clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")), + false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node(origPrimaryNodeId).iterator().next().state(), equalTo(RELOCATING)); @@ -223,8 +223,8 @@ public void testDeadNodeWhileRelocatingOnFromNode() { logger.info("--> moving primary shard to node3"); rerouteResult = allocation.reroute(clusterState, new AllocationCommands( - new MoveAllocationCommand("test",0 , clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")) - ); + new MoveAllocationCommand("test",0 , clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")), + false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node(origPrimaryNodeId).iterator().next().state(), equalTo(RELOCATING)); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/ExpectedShardSizeAllocationTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/ExpectedShardSizeAllocationTests.java index bfa27a36d8b1c..29644f079446e 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/ExpectedShardSizeAllocationTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/ExpectedShardSizeAllocationTests.java @@ -149,7 +149,7 @@ public void addListener(Listener listener) { } else { toNodeId = "node1"; } - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", 0, existingNodeId, toNodeId))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", 0, existingNodeId, toNodeId)), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertEquals(clusterState.getRoutingNodes().node(existingNodeId).iterator().next().state(), ShardRoutingState.RELOCATING); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/FailedShardsRoutingTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/FailedShardsRoutingTests.java index 58e2397b04333..e859c5811c39f 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/FailedShardsRoutingTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/FailedShardsRoutingTests.java @@ -109,8 +109,8 @@ public void testFailedShardPrimaryRelocatingToAndFrom() { logger.info("--> moving primary shard to node3"); rerouteResult = allocation.reroute(clusterState, new AllocationCommands( - new MoveAllocationCommand("test", 0, clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")) - ); + new MoveAllocationCommand("test", 0, clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")), + false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node(origPrimaryNodeId).iterator().next().state(), equalTo(RELOCATING)); @@ -125,8 +125,8 @@ public void testFailedShardPrimaryRelocatingToAndFrom() { logger.info("--> moving primary shard to node3"); rerouteResult = allocation.reroute(clusterState, new AllocationCommands( - new MoveAllocationCommand("test", 0, clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")) - ); + new MoveAllocationCommand("test", 0, clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")), + false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node(origPrimaryNodeId).iterator().next().state(), equalTo(RELOCATING)); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/MaxRetryAllocationDeciderTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/MaxRetryAllocationDeciderTests.java index a16c67af243f8..f76851cfef93c 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/MaxRetryAllocationDeciderTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/MaxRetryAllocationDeciderTests.java @@ -27,6 +27,9 @@ import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.cluster.routing.RoutingTable; import org.elasticsearch.cluster.routing.allocation.allocator.BalancedShardsAllocator; +import org.elasticsearch.cluster.routing.allocation.command.AllocateEmptyPrimaryAllocationCommand; +import org.elasticsearch.cluster.routing.allocation.command.AllocateReplicaAllocationCommand; +import org.elasticsearch.cluster.routing.allocation.command.AllocationCommands; import org.elasticsearch.cluster.routing.allocation.decider.AllocationDeciders; import org.elasticsearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider; import org.elasticsearch.common.settings.Settings; @@ -42,10 +45,17 @@ public class MaxRetryAllocationDeciderTests extends ESAllocationTestCase { - public void testFailedAllocation() { - AllocationService strategy = new AllocationService(Settings.builder().build(), new AllocationDeciders(Settings.EMPTY, - Collections.singleton(new MaxRetryAllocationDecider(Settings.EMPTY))), + private AllocationService strategy; + + @Override + public void setUp() throws Exception { + super.setUp(); + strategy = new AllocationService(Settings.builder().build(), new AllocationDeciders(Settings.EMPTY, + Collections.singleton(new MaxRetryAllocationDecider(Settings.EMPTY))), NoopGatewayAllocator.INSTANCE, new BalancedShardsAllocator(Settings.EMPTY), EmptyClusterInfoService.INSTANCE); + } + + private ClusterState createInitialClusterState() { MetaData.Builder metaBuilder = MetaData.builder(); metaBuilder.put(IndexMetaData.builder("idx").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(0)); MetaData metaData = metaBuilder.build(); @@ -58,7 +68,7 @@ public void testFailedAllocation() { clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2"))) .build(); RoutingTable prevRoutingTable = routingTable; - routingTable = strategy.reroute(clusterState, "reroute").routingTable(); + routingTable = strategy.reroute(clusterState, "reroute", false).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertEquals(prevRoutingTable.index("idx").shards().size(), 1); @@ -66,6 +76,67 @@ public void testFailedAllocation() { assertEquals(routingTable.index("idx").shards().size(), 1); assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + return clusterState; + } + + public void testSingleRetryOnIgnore() { + ClusterState clusterState = createInitialClusterState(); + RoutingTable routingTable = clusterState.routingTable(); + final int retries = MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY.get(Settings.EMPTY); + // now fail it N-1 times + for (int i = 0; i < retries-1; i++) { + List failedShards = Collections.singletonList( + new FailedRerouteAllocation.FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom" + i, + new UnsupportedOperationException())); + RoutingAllocation.Result result = strategy.applyFailedShards(clusterState, failedShards); + assertTrue(result.changed()); + routingTable = result.routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), i+1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom" + i); + } + // now we go and check that we are actually stick to unassigned on the next failure + List failedShards = Collections.singletonList( + new FailedRerouteAllocation.FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom", + new UnsupportedOperationException())); + RoutingAllocation.Result result = strategy.applyFailedShards(clusterState, failedShards); + assertTrue(result.changed()); + routingTable = result.routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom"); + + result = strategy.reroute(clusterState, new AllocationCommands(), false, true); // manual reroute should retry once + assertTrue(result.changed()); + routingTable = result.routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom"); + + // now we go and check that we are actually stick to unassigned on the next failure ie. no retry + failedShards = Collections.singletonList( + new FailedRerouteAllocation.FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom", + new UnsupportedOperationException())); + result = strategy.applyFailedShards(clusterState, failedShards); + assertTrue(result.changed()); + routingTable = result.routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries+1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom"); + + } + + public void testFailedAllocation() { + ClusterState clusterState = createInitialClusterState(); + RoutingTable routingTable = clusterState.routingTable(); final int retries = MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY.get(Settings.EMPTY); // now fail it N-1 times for (int i = 0; i < retries-1; i++) { @@ -103,7 +174,7 @@ public void testFailedAllocation() { Settings.builder().put(clusterState.metaData().index("idx").getSettings()).put("index.allocation.max_retries", retries+1).build() ).build(), true).build()).build(); - RoutingAllocation.Result result = strategy.reroute(clusterState, "settings changed"); + RoutingAllocation.Result result = strategy.reroute(clusterState, "settings changed", false); assertTrue(result.changed()); routingTable = result.routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/NodeVersionAllocationDeciderTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/NodeVersionAllocationDeciderTests.java index 97a3003ab2fd6..d0fc64b4b6b42 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/NodeVersionAllocationDeciderTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/NodeVersionAllocationDeciderTests.java @@ -337,7 +337,7 @@ public void testRebalanceDoesNotAllocatePrimaryAndReplicasOnDifferentVersionNode AllocationService strategy = new MockAllocationService(Settings.EMPTY, allocationDeciders, NoopGatewayAllocator.INSTANCE, new BalancedShardsAllocator(Settings.EMPTY), EmptyClusterInfoService.INSTANCE); - RoutingAllocation.Result result = strategy.reroute(state, new AllocationCommands(), true); + RoutingAllocation.Result result = strategy.reroute(state, new AllocationCommands(), true, false); // the two indices must stay as is, the replicas cannot move to oldNode2 because versions don't match state = ClusterState.builder(state).routingResult(result).build(); assertThat(result.routingTable().index(shard2.getIndex()).shardsWithState(ShardRoutingState.RELOCATING).size(), equalTo(0)); @@ -369,7 +369,7 @@ public void testRestoreDoesNotAllocateSnapshotOnOlderNodes() { AllocationService strategy = new MockAllocationService(Settings.EMPTY, allocationDeciders, NoopGatewayAllocator.INSTANCE, new BalancedShardsAllocator(Settings.EMPTY), EmptyClusterInfoService.INSTANCE); - RoutingAllocation.Result result = strategy.reroute(state, new AllocationCommands(), true); + RoutingAllocation.Result result = strategy.reroute(state, new AllocationCommands(), true, false); // Make sure that primary shards are only allocated on the new node for (int i = 0; i < numberOfShards; i++) { diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/ThrottlingAllocationTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/ThrottlingAllocationTests.java index 28d916e20c15f..61a72bc352a87 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/ThrottlingAllocationTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/ThrottlingAllocationTests.java @@ -283,7 +283,7 @@ public void testOutgoingThrottlesAllocaiton() { assertEquals(clusterState.getRoutingNodes().getOutgoingRecoveries("node2"), 0); assertEquals(clusterState.getRoutingNodes().getOutgoingRecoveries("node3"), 0); - RoutingAllocation.Result reroute = strategy.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", clusterState.getRoutingNodes().node("node1").iterator().next().shardId().id(), "node1", "node2"))); + RoutingAllocation.Result reroute = strategy.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", clusterState.getRoutingNodes().node("node1").iterator().next().shardId().id(), "node1", "node2")), false, false); assertEquals(reroute.explanations().explanations().size(), 1); assertEquals(reroute.explanations().explanations().get(0).decisions().type(), Decision.Type.YES); routingTable = reroute.routingTable(); @@ -296,7 +296,7 @@ public void testOutgoingThrottlesAllocaiton() { assertEquals(clusterState.getRoutingNodes().getOutgoingRecoveries("node3"), 0); // outgoing throttles - reroute = strategy.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", clusterState.getRoutingNodes().node("node3").iterator().next().shardId().id(), "node3", "node1")), true); + reroute = strategy.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", clusterState.getRoutingNodes().node("node3").iterator().next().shardId().id(), "node3", "node1")), true, false); assertEquals(reroute.explanations().explanations().size(), 1); assertEquals(reroute.explanations().explanations().get(0).decisions().type(), Decision.Type.THROTTLE); assertEquals(clusterState.getRoutingNodes().getIncomingRecoveries("node1"), 0); @@ -311,7 +311,7 @@ public void testOutgoingThrottlesAllocaiton() { assertThat(routingTable.shardsWithState(UNASSIGNED).size(), equalTo(0)); // incoming throttles - reroute = strategy.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", clusterState.getRoutingNodes().node("node3").iterator().next().shardId().id(), "node3", "node2")), true); + reroute = strategy.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", clusterState.getRoutingNodes().node("node3").iterator().next().shardId().id(), "node3", "node2")), true, false); assertEquals(reroute.explanations().explanations().size(), 1); assertEquals(reroute.explanations().explanations().get(0).decisions().type(), Decision.Type.THROTTLE); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderTests.java index 2c4e86ad4b1d5..579e87150a7de 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderTests.java @@ -796,7 +796,7 @@ public void addListener(Listener listener) { AllocationCommand relocate1 = new MoveAllocationCommand("test", 0, "node2", "node3"); AllocationCommands cmds = new AllocationCommands(relocate1); - routingTable = strategy.reroute(clusterState, cmds).routingTable(); + routingTable = strategy.reroute(clusterState, cmds, false, false).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); @@ -808,7 +808,7 @@ public void addListener(Listener listener) { // node3, which will put it over the low watermark when it // completes, with shard relocations taken into account this should // throw an exception about not being able to complete - strategy.reroute(clusterState, cmds).routingTable(); + strategy.reroute(clusterState, cmds, false, false).routingTable(); fail("should not have been able to reroute the shard"); } catch (IllegalArgumentException e) { assertThat("can't allocated because there isn't enough room: " + e.getMessage(), @@ -876,7 +876,7 @@ public void testCanRemainWithShardRelocatingAway() { ); ClusterState clusterState = ClusterState.builder(baseClusterState).routingTable(builder.build()).build(); RoutingAllocation routingAllocation = new RoutingAllocation(null, new RoutingNodes(clusterState), clusterState, clusterInfo, - System.nanoTime()); + System.nanoTime(), false); Decision decision = diskThresholdDecider.canRemain(firstRouting, firstRoutingNode, routingAllocation); assertThat(decision.type(), equalTo(Decision.Type.NO)); @@ -896,7 +896,8 @@ public void testCanRemainWithShardRelocatingAway() { ) ); clusterState = ClusterState.builder(baseClusterState).routingTable(builder.build()).build(); - routingAllocation = new RoutingAllocation(null, new RoutingNodes(clusterState), clusterState, clusterInfo, System.nanoTime()); + routingAllocation = new RoutingAllocation(null, new RoutingNodes(clusterState), clusterState, clusterInfo, System.nanoTime(), + false); decision = diskThresholdDecider.canRemain(firstRouting, firstRoutingNode, routingAllocation); assertThat(decision.type(), equalTo(Decision.Type.YES)); @@ -992,7 +993,7 @@ public void testForSingleDataNode() { ); ClusterState clusterState = ClusterState.builder(baseClusterState).routingTable(builder.build()).build(); RoutingAllocation routingAllocation = new RoutingAllocation(null, new RoutingNodes(clusterState), clusterState, clusterInfo, - System.nanoTime()); + System.nanoTime(), false); Decision decision = diskThresholdDecider.canRemain(firstRouting, firstRoutingNode, routingAllocation); // Two shards should start happily @@ -1051,7 +1052,8 @@ public void addListener(Listener listener) { ); clusterState = ClusterState.builder(updateClusterState).routingTable(builder.build()).build(); - routingAllocation = new RoutingAllocation(null, new RoutingNodes(clusterState), clusterState, clusterInfo, System.nanoTime()); + routingAllocation = new RoutingAllocation(null, new RoutingNodes(clusterState), clusterState, clusterInfo, System.nanoTime(), + false); decision = diskThresholdDecider.canRemain(firstRouting, firstRoutingNode, routingAllocation); assertThat(decision.type(), equalTo(Decision.Type.YES)); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java index d9e9fb954458b..008884cbb8d69 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java @@ -136,7 +136,7 @@ public void testCanAllocateUsesMaxAvailableSpace() { ImmutableOpenMap.Builder shardSizes = ImmutableOpenMap.builder(); shardSizes.put("[test][0][p]", 10L); // 10 bytes final ClusterInfo clusterInfo = new ClusterInfo(leastAvailableUsages.build(), mostAvailableUsage.build(), shardSizes.build(), ImmutableOpenMap.of()); - RoutingAllocation allocation = new RoutingAllocation(new AllocationDeciders(Settings.EMPTY, new AllocationDecider[]{decider}), clusterState.getRoutingNodes(), clusterState, clusterInfo, System.nanoTime()); + RoutingAllocation allocation = new RoutingAllocation(new AllocationDeciders(Settings.EMPTY, new AllocationDecider[]{decider}), clusterState.getRoutingNodes(), clusterState, clusterInfo, System.nanoTime(), false); assertEquals(mostAvailableUsage.toString(), Decision.YES, decider.canAllocate(test_0, new RoutingNode("node_0", node_0), allocation)); assertEquals(mostAvailableUsage.toString(), Decision.NO, decider.canAllocate(test_0, new RoutingNode("node_1", node_1), allocation)); } @@ -204,7 +204,7 @@ public void testCanRemainUsesLeastAvailableSpace() { shardSizes.put("[test][2][p]", 10L); final ClusterInfo clusterInfo = new ClusterInfo(leastAvailableUsages.build(), mostAvailableUsage.build(), shardSizes.build(), shardRoutingMap.build()); - RoutingAllocation allocation = new RoutingAllocation(new AllocationDeciders(Settings.EMPTY, new AllocationDecider[]{decider}), clusterState.getRoutingNodes(), clusterState, clusterInfo, System.nanoTime()); + RoutingAllocation allocation = new RoutingAllocation(new AllocationDeciders(Settings.EMPTY, new AllocationDecider[]{decider}), clusterState.getRoutingNodes(), clusterState, clusterInfo, System.nanoTime(), false); assertEquals(Decision.YES, decider.canRemain(test_0, new RoutingNode("node_0", node_0), allocation)); assertEquals(Decision.NO, decider.canRemain(test_1, new RoutingNode("node_1", node_1), allocation)); try { diff --git a/core/src/test/java/org/elasticsearch/gateway/PrimaryShardAllocatorTests.java b/core/src/test/java/org/elasticsearch/gateway/PrimaryShardAllocatorTests.java index 01e717e6fa95e..ee1cf7280e787 100644 --- a/core/src/test/java/org/elasticsearch/gateway/PrimaryShardAllocatorTests.java +++ b/core/src/test/java/org/elasticsearch/gateway/PrimaryShardAllocatorTests.java @@ -346,7 +346,7 @@ private RoutingAllocation getRestoreRoutingAllocation(AllocationDeciders allocat .metaData(metaData) .routingTable(routingTable) .nodes(DiscoveryNodes.builder().put(node1).put(node2).put(node3)).build(); - return new RoutingAllocation(allocationDeciders, new RoutingNodes(state, false), state, null, System.nanoTime()); + return new RoutingAllocation(allocationDeciders, new RoutingNodes(state, false), state, null, System.nanoTime(), false); } /** @@ -425,7 +425,7 @@ private RoutingAllocation getRecoverOnAnyNodeRoutingAllocation(AllocationDecider .metaData(metaData) .routingTable(routingTable) .nodes(DiscoveryNodes.builder().put(node1).put(node2).put(node3)).build(); - return new RoutingAllocation(allocationDeciders, new RoutingNodes(state, false), state, null, System.nanoTime()); + return new RoutingAllocation(allocationDeciders, new RoutingNodes(state, false), state, null, System.nanoTime(), false); } /** @@ -444,7 +444,7 @@ public void testEnoughCopiesFoundForAllocationOnLegacyIndex() { .routingTable(routingTable) .nodes(DiscoveryNodes.builder().put(node1).put(node2).put(node3)).build(); - RoutingAllocation allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime()); + RoutingAllocation allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime(), false); boolean changed = testAllocator.allocateUnassigned(allocation); assertThat(changed, equalTo(false)); assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(1)); @@ -452,7 +452,7 @@ public void testEnoughCopiesFoundForAllocationOnLegacyIndex() { assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(2)); // replicas testAllocator.addData(node1, 1, null, randomBoolean()); - allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime()); + allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime(), false); changed = testAllocator.allocateUnassigned(allocation); assertThat(changed, equalTo(false)); assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(1)); @@ -460,7 +460,7 @@ public void testEnoughCopiesFoundForAllocationOnLegacyIndex() { assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(2)); // replicas testAllocator.addData(node2, 1, null, randomBoolean()); - allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime()); + allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime(), false); changed = testAllocator.allocateUnassigned(allocation); assertThat(changed, equalTo(true)); assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(0)); @@ -485,7 +485,7 @@ public void testEnoughCopiesFoundForAllocationOnLegacyIndexWithDifferentVersion( .routingTable(routingTable) .nodes(DiscoveryNodes.builder().put(node1).put(node2).put(node3)).build(); - RoutingAllocation allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime()); + RoutingAllocation allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime(), false); boolean changed = testAllocator.allocateUnassigned(allocation); assertThat(changed, equalTo(false)); assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(1)); @@ -493,7 +493,7 @@ public void testEnoughCopiesFoundForAllocationOnLegacyIndexWithDifferentVersion( assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(2)); // replicas testAllocator.addData(node1, 1, null, randomBoolean()); - allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime()); + allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime(), false); changed = testAllocator.allocateUnassigned(allocation); assertThat(changed, equalTo(false)); assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(1)); @@ -501,7 +501,7 @@ public void testEnoughCopiesFoundForAllocationOnLegacyIndexWithDifferentVersion( assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(2)); // replicas testAllocator.addData(node2, 2, null, randomBoolean()); - allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime()); + allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime(), false); changed = testAllocator.allocateUnassigned(allocation); assertThat(changed, equalTo(true)); assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(0)); @@ -525,7 +525,7 @@ private RoutingAllocation routingAllocationWithOnePrimaryNoReplicas(AllocationDe .metaData(metaData) .routingTable(routingTableBuilder.build()) .nodes(DiscoveryNodes.builder().put(node1).put(node2).put(node3)).build(); - return new RoutingAllocation(deciders, new RoutingNodes(state, false), state, null, System.nanoTime()); + return new RoutingAllocation(deciders, new RoutingNodes(state, false), state, null, System.nanoTime(), false); } class TestAllocator extends PrimaryShardAllocator { diff --git a/core/src/test/java/org/elasticsearch/gateway/ReplicaShardAllocatorTests.java b/core/src/test/java/org/elasticsearch/gateway/ReplicaShardAllocatorTests.java index 672c9de3d3e71..20eb6286813d9 100644 --- a/core/src/test/java/org/elasticsearch/gateway/ReplicaShardAllocatorTests.java +++ b/core/src/test/java/org/elasticsearch/gateway/ReplicaShardAllocatorTests.java @@ -302,7 +302,7 @@ private RoutingAllocation onePrimaryOnNode1And1Replica(AllocationDeciders decide .metaData(metaData) .routingTable(routingTable) .nodes(DiscoveryNodes.builder().put(node1).put(node2).put(node3)).build(); - return new RoutingAllocation(deciders, new RoutingNodes(state, false), state, ClusterInfo.EMPTY, System.nanoTime()); + return new RoutingAllocation(deciders, new RoutingNodes(state, false), state, ClusterInfo.EMPTY, System.nanoTime(), false); } private RoutingAllocation onePrimaryOnNode1And1ReplicaRecovering(AllocationDeciders deciders) { @@ -324,7 +324,7 @@ private RoutingAllocation onePrimaryOnNode1And1ReplicaRecovering(AllocationDecid .metaData(metaData) .routingTable(routingTable) .nodes(DiscoveryNodes.builder().put(node1).put(node2).put(node3)).build(); - return new RoutingAllocation(deciders, new RoutingNodes(state, false), state, ClusterInfo.EMPTY, System.nanoTime()); + return new RoutingAllocation(deciders, new RoutingNodes(state, false), state, ClusterInfo.EMPTY, System.nanoTime(), false); } class TestAllocator extends ReplicaShardAllocator { diff --git a/core/src/test/java/org/elasticsearch/indices/state/RareClusterStateIT.java b/core/src/test/java/org/elasticsearch/indices/state/RareClusterStateIT.java index dc53373788685..81c50cc4f9cf4 100644 --- a/core/src/test/java/org/elasticsearch/indices/state/RareClusterStateIT.java +++ b/core/src/test/java/org/elasticsearch/indices/state/RareClusterStateIT.java @@ -103,7 +103,7 @@ public void testUnassignedShardAndEmptyNodesInRoutingTable() throws Exception { .nodes(DiscoveryNodes.EMPTY_NODES) .build(), false ); - RoutingAllocation routingAllocation = new RoutingAllocation(allocationDeciders, routingNodes, current, ClusterInfo.EMPTY, System.nanoTime()); + RoutingAllocation routingAllocation = new RoutingAllocation(allocationDeciders, routingNodes, current, ClusterInfo.EMPTY, System.nanoTime(), false); allocator.allocateUnassigned(routingAllocation); } diff --git a/docs/reference/cluster/reroute.asciidoc b/docs/reference/cluster/reroute.asciidoc index 99e754df52931..ba9424e731ef1 100644 --- a/docs/reference/cluster/reroute.asciidoc +++ b/docs/reference/cluster/reroute.asciidoc @@ -103,3 +103,15 @@ are available: To ensure that these implications are well-understood, this command requires the special field `accept_data_loss` to be explicitly set to `true` for it to work. + +=== Retry failed shards + +In the case when the allocation of a shard failed multiple times without a +successful allocation, the cluster tries to prevent infinite allocation loops. +To manually retry shards that have failed too often the `_reroute` API can be +called with `?retry_failed=true` to execute a single retry allocation round for +those shards. +In order to identify shards that exceeded the maximum number of retries, run the `_reroute` API +with `?explain=true` and check for the `max_retry` allocation decider. +The maximum number of retries can be configured per index by setting `index.allocation.max_retries` +to a value greater than `0`. The default is `5` diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.reroute.json b/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.reroute.json index 2ae42c089d318..8bb85ca087ae8 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.reroute.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.reroute.json @@ -16,6 +16,10 @@ "type" : "boolean", "description" : "Return an explanation of why the commands can or cannot be executed" }, + "retry_failed": { + "type" : "boolean", + "description" : "Retries allocation of shards that are blocked due to too many subsequent allocation failures" + }, "metric": { "type": "list", "options": ["_all", "blocks", "metadata", "nodes", "routing_table", "master_node", "version"], From 68ad1e0d1340239a367955933c7569cad6850dc4 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 20 May 2016 11:02:22 +0200 Subject: [PATCH 09/12] fix check condition --- .../decider/MaxRetryAllocationDecider.java | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java index 31b8ea940e2be..6a8a0ccc5fab3 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java @@ -57,19 +57,17 @@ public MaxRetryAllocationDecider(Settings settings) { @Override public Decision canAllocate(ShardRouting shardRouting, RoutingAllocation allocation) { UnassignedInfo unassignedInfo = shardRouting.unassignedInfo(); - if (unassignedInfo != null && unassignedInfo.getNumFailedAllocations() > 0 - || allocation.isRetryFailed() // if we are called via the _reroute API we ignore the failure counter and try to allocate - // this improves the usability since people don't need to raise the limits to issue retries since a simple _reroute call is - // enough to manually retry. - ) { + if (unassignedInfo != null && unassignedInfo.getNumFailedAllocations() > 0) { final IndexMetaData indexMetaData = allocation.metaData().getIndexSafe(shardRouting.index()); final int maxRetry = SETTING_ALLOCATION_MAX_RETRY.get(indexMetaData.getSettings()); - if (allocation.isRetryFailed()) { //manual allocation - retry + if (allocation.isRetryFailed()) { // manual allocation - retry + // if we are called via the _reroute API we ignore the failure counter and try to allocate + // this improves the usability since people don't need to raise the limits to issue retries since a simple _reroute call is + // enough to manually retry. return allocation.decision(Decision.YES, NAME, "shard has already failed allocating [" + unassignedInfo.getNumFailedAllocations() + "] times vs. [" + maxRetry + "] retries allowed " + unassignedInfo.toString() + " - retrying once on manual allocation"); - } - if (unassignedInfo.getNumFailedAllocations() >= maxRetry) { + } else if (unassignedInfo.getNumFailedAllocations() >= maxRetry) { return allocation.decision(Decision.NO, NAME, "shard has already failed allocating [" + unassignedInfo.getNumFailedAllocations() + "] times vs. [" + maxRetry + "] retries allowed " + unassignedInfo.toString() + " - manually call [/_cluster/reroute?retry_failed=true] to retry"); From 36aeb557d5e6ff57d976cd874ecf7c4fb0115bc4 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 20 May 2016 12:14:40 +0200 Subject: [PATCH 10/12] feedback from @ywelsch --- .../action/admin/cluster/reroute/ClusterRerouteRequest.java | 2 +- .../admin/cluster/reroute/ClusterRerouteRequestBuilder.java | 2 +- .../admin/cluster/reroute/TransportClusterRerouteAction.java | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequest.java b/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequest.java index 2a38315f635ea..98cd8bee7abec 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequest.java +++ b/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequest.java @@ -84,7 +84,7 @@ public ClusterRerouteRequest explain(boolean explain) { /** * Sets the retry failed flag (defaults to false). If true, the - * request will retry allocating shards that are currently can't be allocated due to too many allocation failures. + * request will retry allocating shards that can't currently be allocated due to too many allocation failures. */ public ClusterRerouteRequest setRetryFailed(boolean retryFailed) { this.retryFailed = retryFailed; diff --git a/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequestBuilder.java b/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequestBuilder.java index 2ae0570a6b8d8..e5d130cf8125c 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequestBuilder.java +++ b/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequestBuilder.java @@ -62,7 +62,7 @@ public ClusterRerouteRequestBuilder setExplain(boolean explain) { /** * Sets the retry failed flag (defaults to false). If true, the - * request will retry allocating shards that are currently can't be allocated due to too many allocation failures. + * request will retry allocating shards that can't currently be allocated due to too many allocation failures. */ public ClusterRerouteRequestBuilder setRetryFailed(boolean retryFailed) { request.setRetryFailed(retryFailed); diff --git a/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/TransportClusterRerouteAction.java b/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/TransportClusterRerouteAction.java index 2a834c3fcfae8..e9bbec3f8817b 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/TransportClusterRerouteAction.java +++ b/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/TransportClusterRerouteAction.java @@ -91,7 +91,8 @@ public void onFailure(String source, Throwable t) { @Override public ClusterState execute(ClusterState currentState) { - RoutingAllocation.Result routingResult = allocationService.reroute(currentState, request.commands, request.explain(), true); + RoutingAllocation.Result routingResult = allocationService.reroute(currentState, request.commands, request.explain(), + request.isRetryFailed()); ClusterState newState = ClusterState.builder(currentState).routingResult(routingResult).build(); clusterStateToSend = newState; explanations = routingResult.explanations(); From fd18eebb455d2573519dd9b53c4e1175a1b025a2 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 20 May 2016 19:33:57 +0200 Subject: [PATCH 11/12] fix docs --- docs/reference/cluster/reroute.asciidoc | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/docs/reference/cluster/reroute.asciidoc b/docs/reference/cluster/reroute.asciidoc index ba9424e731ef1..bb48a00fbe53d 100644 --- a/docs/reference/cluster/reroute.asciidoc +++ b/docs/reference/cluster/reroute.asciidoc @@ -104,14 +104,15 @@ are available: this command requires the special field `accept_data_loss` to be explicitly set to `true` for it to work. +[float] === Retry failed shards -In the case when the allocation of a shard failed multiple times without a -successful allocation, the cluster tries to prevent infinite allocation loops. -To manually retry shards that have failed too often the `_reroute` API can be -called with `?retry_failed=true` to execute a single retry allocation round for -those shards. -In order to identify shards that exceeded the maximum number of retries, run the `_reroute` API -with `?explain=true` and check for the `max_retry` allocation decider. -The maximum number of retries can be configured per index by setting `index.allocation.max_retries` -to a value greater than `0`. The default is `5` +The cluster will attempt to allocate a shard a maximum of +`index.allocation.max_retries` times in a row (defaults to `5`), before giving +up and leaving the shard unallocated. This scenario can be caused by +structural problems such as having an analyzer which refers to a stopwords +file which doesn't exist on all nodes. + +Once the problem has been corrected, allocation can be manually retried by +calling the <> API with `?retry_failed`, which +will attempt a single retry round for these shards. \ No newline at end of file From 6ff68228678868d97b046e28f20e5f50b0d7b1c0 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 20 May 2016 20:26:43 +0200 Subject: [PATCH 12/12] add unittests for ClusterReroute --- .../reroute/ClusterRerouteRequest.java | 17 +- .../TransportClusterRerouteAction.java | 73 ++++--- .../cluster/reroute/ClusterRerouteTests.java | 181 ++++++++++++++++++ 3 files changed, 239 insertions(+), 32 deletions(-) create mode 100644 core/src/test/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteTests.java diff --git a/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequest.java b/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequest.java index 98cd8bee7abec..dcc45ab21b961 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequest.java +++ b/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequest.java @@ -38,10 +38,10 @@ * Request to submit cluster reroute allocation commands */ public class ClusterRerouteRequest extends AcknowledgedRequest { - AllocationCommands commands = new AllocationCommands(); - boolean dryRun; - boolean explain; - boolean retryFailed; + private AllocationCommands commands = new AllocationCommands(); + private boolean dryRun; + private boolean explain; + private boolean retryFailed; public ClusterRerouteRequest() { } @@ -114,6 +114,13 @@ public ClusterRerouteRequest commands(AllocationCommand... commands) { return this; } + /** + * Returns the allocation commands to execute + */ + public AllocationCommands getCommands() { + return commands; + } + /** * Sets the source for the request. */ @@ -154,6 +161,7 @@ public void readFrom(StreamInput in) throws IOException { commands = AllocationCommands.readFrom(in); dryRun = in.readBoolean(); explain = in.readBoolean(); + retryFailed = in.readBoolean(); readTimeout(in); } @@ -163,6 +171,7 @@ public void writeTo(StreamOutput out) throws IOException { AllocationCommands.writeTo(commands, out); out.writeBoolean(dryRun); out.writeBoolean(explain); + out.writeBoolean(retryFailed); writeTimeout(out); } } diff --git a/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/TransportClusterRerouteAction.java b/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/TransportClusterRerouteAction.java index e9bbec3f8817b..b0b676f6e2e50 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/TransportClusterRerouteAction.java +++ b/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/TransportClusterRerouteAction.java @@ -33,6 +33,7 @@ import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.Priority; import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.logging.ESLogger; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.TransportService; @@ -68,39 +69,55 @@ protected ClusterRerouteResponse newResponse() { @Override protected void masterOperation(final ClusterRerouteRequest request, final ClusterState state, final ActionListener listener) { - clusterService.submitStateUpdateTask("cluster_reroute (api)", new AckedClusterStateUpdateTask(Priority.IMMEDIATE, request, listener) { + clusterService.submitStateUpdateTask("cluster_reroute (api)", new ClusterRerouteResponseAckedClusterStateUpdateTask(logger, + allocationService, request, listener)); + } - private volatile ClusterState clusterStateToSend; - private volatile RoutingExplanations explanations; + static class ClusterRerouteResponseAckedClusterStateUpdateTask extends AckedClusterStateUpdateTask { - @Override - protected ClusterRerouteResponse newResponse(boolean acknowledged) { - return new ClusterRerouteResponse(acknowledged, clusterStateToSend, explanations); - } + private final ClusterRerouteRequest request; + private final ActionListener listener; + private final ESLogger logger; + private final AllocationService allocationService; + private volatile ClusterState clusterStateToSend; + private volatile RoutingExplanations explanations; - @Override - public void onAckTimeout() { - listener.onResponse(new ClusterRerouteResponse(false, clusterStateToSend, new RoutingExplanations())); - } + ClusterRerouteResponseAckedClusterStateUpdateTask(ESLogger logger, AllocationService allocationService, ClusterRerouteRequest request, + ActionListener listener) { + super(Priority.IMMEDIATE, request, listener); + this.request = request; + this.listener = listener; + this.logger = logger; + this.allocationService = allocationService; + } - @Override - public void onFailure(String source, Throwable t) { - logger.debug("failed to perform [{}]", t, source); - super.onFailure(source, t); - } + @Override + protected ClusterRerouteResponse newResponse(boolean acknowledged) { + return new ClusterRerouteResponse(acknowledged, clusterStateToSend, explanations); + } + + @Override + public void onAckTimeout() { + listener.onResponse(new ClusterRerouteResponse(false, clusterStateToSend, new RoutingExplanations())); + } + + @Override + public void onFailure(String source, Throwable t) { + logger.debug("failed to perform [{}]", t, source); + super.onFailure(source, t); + } - @Override - public ClusterState execute(ClusterState currentState) { - RoutingAllocation.Result routingResult = allocationService.reroute(currentState, request.commands, request.explain(), - request.isRetryFailed()); - ClusterState newState = ClusterState.builder(currentState).routingResult(routingResult).build(); - clusterStateToSend = newState; - explanations = routingResult.explanations(); - if (request.dryRun) { - return currentState; - } - return newState; + @Override + public ClusterState execute(ClusterState currentState) { + RoutingAllocation.Result routingResult = allocationService.reroute(currentState, request.getCommands(), request.explain(), + request.isRetryFailed()); + ClusterState newState = ClusterState.builder(currentState).routingResult(routingResult).build(); + clusterStateToSend = newState; + explanations = routingResult.explanations(); + if (request.dryRun()) { + return currentState; } - }); + return newState; + } } } diff --git a/core/src/test/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteTests.java b/core/src/test/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteTests.java new file mode 100644 index 0000000000000..927f572487ce5 --- /dev/null +++ b/core/src/test/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteTests.java @@ -0,0 +1,181 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.action.admin.cluster.reroute; + +import org.elasticsearch.Version; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.EmptyClusterInfoService; +import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.cluster.metadata.MetaData; +import org.elasticsearch.cluster.node.DiscoveryNodes; +import org.elasticsearch.cluster.routing.RoutingTable; +import org.elasticsearch.cluster.routing.allocation.AllocationService; +import org.elasticsearch.cluster.routing.allocation.FailedRerouteAllocation; +import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; +import org.elasticsearch.cluster.routing.allocation.allocator.BalancedShardsAllocator; +import org.elasticsearch.cluster.routing.allocation.command.AllocateEmptyPrimaryAllocationCommand; +import org.elasticsearch.cluster.routing.allocation.decider.AllocationDeciders; +import org.elasticsearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.io.stream.BytesStreamOutput; +import org.elasticsearch.common.io.stream.NamedWriteableAwareStreamInput; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.network.NetworkModule; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.test.ESAllocationTestCase; +import org.elasticsearch.test.gateway.NoopGatewayAllocator; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.atomic.AtomicReference; + +import static org.elasticsearch.cluster.routing.ShardRoutingState.INITIALIZING; +import static org.elasticsearch.cluster.routing.ShardRoutingState.UNASSIGNED; + +public class ClusterRerouteTests extends ESAllocationTestCase { + + public void testSerializeRequest() throws IOException { + ClusterRerouteRequest req = new ClusterRerouteRequest(); + req.setRetryFailed(randomBoolean()); + req.dryRun(randomBoolean()); + req.explain(randomBoolean()); + req.commands(new AllocateEmptyPrimaryAllocationCommand("foo", 1, "bar", randomBoolean())); + req.timeout(TimeValue.timeValueMillis(randomIntBetween(0, 100))); + BytesStreamOutput out = new BytesStreamOutput(); + req.writeTo(out); + BytesReference bytes = out.bytes(); + NamedWriteableRegistry namedWriteableRegistry = new NamedWriteableRegistry(); + new NetworkModule(null, Settings.EMPTY, true, namedWriteableRegistry); + StreamInput wrap = new NamedWriteableAwareStreamInput(StreamInput.wrap(bytes.toBytes()), + namedWriteableRegistry); + ClusterRerouteRequest deserializedReq = new ClusterRerouteRequest(); + deserializedReq.readFrom(wrap); + + assertEquals(req.isRetryFailed(), deserializedReq.isRetryFailed()); + assertEquals(req.dryRun(), deserializedReq.dryRun()); + assertEquals(req.explain(), deserializedReq.explain()); + assertEquals(req.timeout(), deserializedReq.timeout()); + assertEquals(1, deserializedReq.getCommands().commands().size()); // allocation commands have their own tests + assertEquals(req.getCommands().commands().size(), deserializedReq.getCommands().commands().size()); + } + + public void testClusterStateUpdateTask() { + AllocationService allocationService = new AllocationService(Settings.builder().build(), new AllocationDeciders(Settings.EMPTY, + Collections.singleton(new MaxRetryAllocationDecider(Settings.EMPTY))), + NoopGatewayAllocator.INSTANCE, new BalancedShardsAllocator(Settings.EMPTY), EmptyClusterInfoService.INSTANCE); + ClusterState clusterState = createInitialClusterState(allocationService); + ClusterRerouteRequest req = new ClusterRerouteRequest(); + req.dryRun(true); + AtomicReference responseRef = new AtomicReference<>(); + ActionListener responseActionListener = new ActionListener() { + @Override + public void onResponse(ClusterRerouteResponse clusterRerouteResponse) { + responseRef.set(clusterRerouteResponse); + } + + @Override + public void onFailure(Throwable e) { + + } + }; + TransportClusterRerouteAction.ClusterRerouteResponseAckedClusterStateUpdateTask task = + new TransportClusterRerouteAction.ClusterRerouteResponseAckedClusterStateUpdateTask(logger, allocationService, req, + responseActionListener ); + ClusterState execute = task.execute(clusterState); + assertSame(execute, clusterState); // dry-run + task.onAllNodesAcked(null); + assertNotSame(responseRef.get().getState(), execute); + + req.dryRun(false);// now we allocate + + final int retries = MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY.get(Settings.EMPTY); + // now fail it N-1 times + for (int i = 0; i < retries; i++) { + ClusterState newState = task.execute(clusterState); + assertNotSame(newState, clusterState); // dry-run=false + clusterState = newState; + RoutingTable routingTable = clusterState.routingTable(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), i); + List failedShards = Collections.singletonList( + new FailedRerouteAllocation.FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom" + i, + new UnsupportedOperationException())); + RoutingAllocation.Result result = allocationService.applyFailedShards(clusterState, failedShards); + assertTrue(result.changed()); + clusterState = ClusterState.builder(clusterState).routingTable(result.routingTable()).build(); + routingTable = clusterState.routingTable(); + assertEquals(routingTable.index("idx").shards().size(), 1); + if (i == retries-1) { + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); + } else { + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + } + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), i+1); + } + + + // without retry_failed we won't allocate that shard + ClusterState newState = task.execute(clusterState); + assertNotSame(newState, clusterState); // dry-run=false + task.onAllNodesAcked(null); + assertSame(responseRef.get().getState(), newState); + RoutingTable routingTable = clusterState.routingTable(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries); + + req.setRetryFailed(true); // now we manually retry and get the shard back into initializing + newState = task.execute(clusterState); + assertNotSame(newState, clusterState); // dry-run=false + clusterState = newState; + routingTable = clusterState.routingTable(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries); + } + + private ClusterState createInitialClusterState(AllocationService service) { + MetaData.Builder metaBuilder = MetaData.builder(); + metaBuilder.put(IndexMetaData.builder("idx").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(0)); + MetaData metaData = metaBuilder.build(); + RoutingTable.Builder routingTableBuilder = RoutingTable.builder(); + routingTableBuilder.addAsNew(metaData.index("idx")); + + RoutingTable routingTable = routingTableBuilder.build(); + ClusterState clusterState = ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT) + .metaData(metaData).routingTable(routingTable).build(); + clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2"))) + .build(); + RoutingTable prevRoutingTable = routingTable; + routingTable = service.reroute(clusterState, "reroute").routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + + assertEquals(prevRoutingTable.index("idx").shards().size(), 1); + assertEquals(prevRoutingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); + + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + return clusterState; + } +}