From 9093c75d2af8b55c62921c93d04771bcc0005951 Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 4 Dec 2018 12:59:08 +0000 Subject: [PATCH 01/15] [Zen2] Best-effort cluster formation if unconfigured In real deployments it is important that clusters are properly configured to avoid accidentally forming multiple independent clusters at cluster bootstrapping time. However we also expect to be able to unpack Elasticsearch and start up one or more nodes without any up-front configuration, and have them do their best to find each other and form a cluster after a few seconds. This change adds a delayed automatic bootstrapping process to nodes that start up with no relevant settings set to support the desired out-of-the-box experience without compromising safety in properly-configured deployments. --- .../coordination/ClusterBootstrapService.java | 61 ++++++++++++++++++- .../common/settings/ClusterSettings.java | 1 + .../ClusterBootstrapServiceTests.java | 55 +++++++++++++++-- 3 files changed, 112 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java b/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java index 093d3a2bf0e60..bf6c3ddf98372 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java @@ -44,6 +44,10 @@ import java.util.Collections; import java.util.List; import java.util.function.Function; +import java.util.stream.Stream; + +import static org.elasticsearch.discovery.DiscoveryModule.DISCOVERY_HOSTS_PROVIDER_SETTING; +import static org.elasticsearch.discovery.zen.SettingsBasedHostsProvider.DISCOVERY_ZEN_PING_UNICAST_HOSTS_SETTING; public class ClusterBootstrapService { @@ -57,14 +61,22 @@ public class ClusterBootstrapService { public static final Setting> INITIAL_MASTER_NODES_SETTING = Setting.listSetting("cluster.initial_master_nodes", Collections.emptyList(), Function.identity(), Property.NodeScope); + public static final Setting UNCONFIGURED_DISCOVERY_TIMEOUT_SETTING = + Setting.timeSetting("cluster.unconfigured_discovery_timeout", + TimeValue.timeValueSeconds(3), TimeValue.timeValueMillis(1), Property.NodeScope); + private final int initialMasterNodeCount; private final List initialMasterNodes; + private final TimeValue unconfiguredDiscoveryTimeout; private final TransportService transportService; private volatile boolean running; public ClusterBootstrapService(Settings settings, TransportService transportService) { initialMasterNodeCount = INITIAL_MASTER_NODE_COUNT_SETTING.get(settings); initialMasterNodes = INITIAL_MASTER_NODES_SETTING.get(settings); + final boolean isConfigured = Stream.of(DISCOVERY_HOSTS_PROVIDER_SETTING, DISCOVERY_ZEN_PING_UNICAST_HOSTS_SETTING, + INITIAL_MASTER_NODE_COUNT_SETTING, INITIAL_MASTER_NODES_SETTING).anyMatch(s -> s.exists(settings)); + unconfiguredDiscoveryTimeout = isConfigured ? TimeValue.MINUS_ONE : UNCONFIGURED_DISCOVERY_TIMEOUT_SETTING.get(settings); this.transportService = transportService; } @@ -72,7 +84,54 @@ public void start() { assert running == false; running = true; - if (initialMasterNodeCount > 0 && transportService.getLocalNode().isMasterNode()) { + if (transportService.getLocalNode().isMasterNode() == false) { + return; + } + + if (unconfiguredDiscoveryTimeout.compareTo(TimeValue.ZERO) > 0) { + logger.debug("discovery not configured, performing best-effort cluster formation after [{}]", unconfiguredDiscoveryTimeout); + final ThreadContext threadContext = transportService.getThreadPool().getThreadContext(); + try (ThreadContext.StoredContext ignore = threadContext.stashContext()) { + threadContext.markAsSystemContext(); + + transportService.getThreadPool().scheduleUnlessShuttingDown(unconfiguredDiscoveryTimeout, Names.SAME, new Runnable() { + @Override + public void run() { + final GetDiscoveredNodesRequest request = new GetDiscoveredNodesRequest(); + logger.trace("sending {}", request); + transportService.sendRequest(transportService.getLocalNode(), GetDiscoveredNodesAction.NAME, request, + new TransportResponseHandler() { + @Override + public void handleResponse(GetDiscoveredNodesResponse response) { + logger.debug("discovered {}, starting to bootstrap", response.getNodes()); + awaitBootstrap(response.getBootstrapConfiguration()); + } + + @Override + public void handleException(TransportException exp) { + logger.warn("discovery attempt failed", exp); + } + + @Override + public String executor() { + return Names.SAME; + } + + @Override + public GetDiscoveredNodesResponse read(StreamInput in) throws IOException { + return new GetDiscoveredNodesResponse(in); + } + }); + } + + @Override + public String toString() { + return "development-mode delayed bootstrap"; + } + }); + + } + } else if (initialMasterNodeCount > 0) { logger.debug("unsafely waiting for discovery of [{}] master-eligible nodes", initialMasterNodeCount); final ThreadContext threadContext = transportService.getThreadPool().getThreadContext(); diff --git a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java index 0a6335ebc4952..9bde10fa5572b 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java @@ -472,6 +472,7 @@ public void apply(Settings value, Settings current, Settings previous) { TransportAddVotingTombstonesAction.MAXIMUM_VOTING_TOMBSTONES_SETTING, ClusterBootstrapService.INITIAL_MASTER_NODES_SETTING, ClusterBootstrapService.INITIAL_MASTER_NODE_COUNT_SETTING, + ClusterBootstrapService.UNCONFIGURED_DISCOVERY_TIMEOUT_SETTING, LagDetector.CLUSTER_FOLLOWER_LAG_TIMEOUT_SETTING ))); diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/ClusterBootstrapServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/ClusterBootstrapServiceTests.java index 952d7c0e7528a..1ddcf01916187 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/ClusterBootstrapServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/ClusterBootstrapServiceTests.java @@ -23,12 +23,14 @@ import org.elasticsearch.action.admin.cluster.bootstrap.BootstrapClusterAction; import org.elasticsearch.action.admin.cluster.bootstrap.BootstrapClusterRequest; import org.elasticsearch.action.admin.cluster.bootstrap.BootstrapClusterResponse; +import org.elasticsearch.action.admin.cluster.bootstrap.BootstrapConfiguration.NodeDescription; import org.elasticsearch.action.admin.cluster.bootstrap.GetDiscoveredNodesAction; import org.elasticsearch.action.admin.cluster.bootstrap.GetDiscoveredNodesRequest; import org.elasticsearch.action.admin.cluster.bootstrap.GetDiscoveredNodesResponse; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNode.Role; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.settings.Settings.Builder; import org.elasticsearch.tasks.Task; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.transport.MockTransport; @@ -48,8 +50,13 @@ import static java.util.Collections.emptyMap; import static java.util.Collections.emptySet; import static java.util.Collections.singleton; +import static org.elasticsearch.cluster.coordination.ClusterBootstrapService.INITIAL_MASTER_NODES_SETTING; import static org.elasticsearch.cluster.coordination.ClusterBootstrapService.INITIAL_MASTER_NODE_COUNT_SETTING; +import static org.elasticsearch.common.settings.Settings.builder; +import static org.elasticsearch.discovery.DiscoveryModule.DISCOVERY_HOSTS_PROVIDER_SETTING; +import static org.elasticsearch.discovery.zen.SettingsBasedHostsProvider.DISCOVERY_ZEN_PING_UNICAST_HOSTS_SETTING; import static org.elasticsearch.node.Node.NODE_NAME_SETTING; +import static org.hamcrest.Matchers.equalTo; public class ClusterBootstrapServiceTests extends ESTestCase { @@ -64,7 +71,7 @@ public void createServices() { otherNode1 = newDiscoveryNode("other1"); otherNode2 = newDiscoveryNode("other2"); - deterministicTaskQueue = new DeterministicTaskQueue(Settings.builder().put(NODE_NAME_SETTING.getKey(), "node").build(), random()); + deterministicTaskQueue = new DeterministicTaskQueue(builder().put(NODE_NAME_SETTING.getKey(), "node").build(), random()); final MockTransport transport = new MockTransport() { @Override @@ -76,7 +83,7 @@ protected void onSendRequest(long requestId, String action, TransportRequest req transportService = transport.createTransportService(Settings.EMPTY, deterministicTaskQueue.getThreadPool(), TransportService.NOOP_TRANSPORT_INTERCEPTOR, boundTransportAddress -> localNode, null, emptySet()); - clusterBootstrapService = new ClusterBootstrapService(Settings.builder().put(INITIAL_MASTER_NODE_COUNT_SETTING.getKey(), 3).build(), + clusterBootstrapService = new ClusterBootstrapService(builder().put(INITIAL_MASTER_NODE_COUNT_SETTING.getKey(), 3).build(), transportService); } @@ -102,8 +109,24 @@ public void testDoesNothingOnNonMasterNodes() { deterministicTaskQueue.runAllTasks(); } - public void testDoesNothingIfSettingIsUnset() { - clusterBootstrapService = new ClusterBootstrapService(Settings.EMPTY, transportService); + public void testDoesNothingByDefaultIfHostsProviderConfigured() { + testConfiguredIfSettingSet(builder().putList(DISCOVERY_HOSTS_PROVIDER_SETTING.getKey())); + } + + public void testDoesNothingByDefaultIfUnicastHostsConfigured() { + testConfiguredIfSettingSet(builder().putList(DISCOVERY_ZEN_PING_UNICAST_HOSTS_SETTING.getKey())); + } + + public void testDoesNothingByDefaultIfMasterNodeCountConfigured() { + testConfiguredIfSettingSet(builder().put(INITIAL_MASTER_NODE_COUNT_SETTING.getKey(), 0)); + } + + public void testDoesNothingByDefaultIfMasterNodesConfigured() { + testConfiguredIfSettingSet(builder().putList(INITIAL_MASTER_NODES_SETTING.getKey())); + } + + private void testConfiguredIfSettingSet(Builder builder) { + clusterBootstrapService = new ClusterBootstrapService(builder.build(), transportService); transportService.registerRequestHandler(GetDiscoveredNodesAction.NAME, Names.SAME, GetDiscoveredNodesRequest::new, (request, channel, task) -> { throw new AssertionError("should not make a discovery request"); @@ -112,6 +135,30 @@ public void testDoesNothingIfSettingIsUnset() { deterministicTaskQueue.runAllTasks(); } + public void testBootstrapsByDefaultInDevelopmentMode() { + clusterBootstrapService = new ClusterBootstrapService(Settings.EMPTY, transportService); + + final Set discoveredNodes = Stream.of(localNode, otherNode1, otherNode2).collect(Collectors.toSet()); + transportService.registerRequestHandler(GetDiscoveredNodesAction.NAME, Names.SAME, GetDiscoveredNodesRequest::new, + (request, channel, task) -> channel.sendResponse(new GetDiscoveredNodesResponse(discoveredNodes))); + + final AtomicBoolean bootstrapped = new AtomicBoolean(); + transportService.registerRequestHandler(BootstrapClusterAction.NAME, Names.SAME, BootstrapClusterRequest::new, + (request, channel, task) -> { + assertThat(request.getBootstrapConfiguration().getNodeDescriptions().stream() + .map(NodeDescription::getId).collect(Collectors.toSet()), + equalTo(discoveredNodes.stream().map(DiscoveryNode::getId).collect(Collectors.toSet()))); + + channel.sendResponse(new BootstrapClusterResponse(randomBoolean())); + assertTrue(bootstrapped.compareAndSet(false, true)); + }); + + startServices(); + deterministicTaskQueue.runAllTasks(); + + assertTrue(bootstrapped.get()); + } + public void testDoesNotRetryOnDiscoveryFailure() { transportService.registerRequestHandler(GetDiscoveredNodesAction.NAME, Names.SAME, GetDiscoveredNodesRequest::new, new TransportRequestHandler() { From 75c7fee83baaf7ac0edef5f64490e9236a435ccc Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 4 Dec 2018 14:26:15 +0000 Subject: [PATCH 02/15] Nullable rather than -1 --- .../cluster/coordination/ClusterBootstrapService.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java b/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java index bf6c3ddf98372..ebf0bbe0777ae 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java @@ -29,6 +29,7 @@ import org.elasticsearch.action.admin.cluster.bootstrap.GetDiscoveredNodesRequest; import org.elasticsearch.action.admin.cluster.bootstrap.GetDiscoveredNodesResponse; import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.common.Nullable; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Setting.Property; @@ -67,6 +68,7 @@ public class ClusterBootstrapService { private final int initialMasterNodeCount; private final List initialMasterNodes; + @Nullable private final TimeValue unconfiguredDiscoveryTimeout; private final TransportService transportService; private volatile boolean running; @@ -76,7 +78,7 @@ public ClusterBootstrapService(Settings settings, TransportService transportServ initialMasterNodes = INITIAL_MASTER_NODES_SETTING.get(settings); final boolean isConfigured = Stream.of(DISCOVERY_HOSTS_PROVIDER_SETTING, DISCOVERY_ZEN_PING_UNICAST_HOSTS_SETTING, INITIAL_MASTER_NODE_COUNT_SETTING, INITIAL_MASTER_NODES_SETTING).anyMatch(s -> s.exists(settings)); - unconfiguredDiscoveryTimeout = isConfigured ? TimeValue.MINUS_ONE : UNCONFIGURED_DISCOVERY_TIMEOUT_SETTING.get(settings); + unconfiguredDiscoveryTimeout = isConfigured ? null : UNCONFIGURED_DISCOVERY_TIMEOUT_SETTING.get(settings); this.transportService = transportService; } @@ -88,7 +90,7 @@ public void start() { return; } - if (unconfiguredDiscoveryTimeout.compareTo(TimeValue.ZERO) > 0) { + if (unconfiguredDiscoveryTimeout != null) { logger.debug("discovery not configured, performing best-effort cluster formation after [{}]", unconfiguredDiscoveryTimeout); final ThreadContext threadContext = transportService.getThreadPool().getThreadContext(); try (ThreadContext.StoredContext ignore = threadContext.stashContext()) { From e7198d7389bd862ce2b352e32d2f3376c4c4df60 Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 5 Dec 2018 08:15:13 +0000 Subject: [PATCH 03/15] Rename setting --- .../cluster/coordination/ClusterBootstrapService.java | 6 +++--- .../org/elasticsearch/common/settings/ClusterSettings.java | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java b/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java index ebf0bbe0777ae..2e67f53c86ced 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java @@ -62,8 +62,8 @@ public class ClusterBootstrapService { public static final Setting> INITIAL_MASTER_NODES_SETTING = Setting.listSetting("cluster.initial_master_nodes", Collections.emptyList(), Function.identity(), Property.NodeScope); - public static final Setting UNCONFIGURED_DISCOVERY_TIMEOUT_SETTING = - Setting.timeSetting("cluster.unconfigured_discovery_timeout", + public static final Setting UNCONFIGURED_BOOTSTRAP_TIMEOUT_SETTING = + Setting.timeSetting("discovery.unconfigured_bootstrap_timeout", TimeValue.timeValueSeconds(3), TimeValue.timeValueMillis(1), Property.NodeScope); private final int initialMasterNodeCount; @@ -78,7 +78,7 @@ public ClusterBootstrapService(Settings settings, TransportService transportServ initialMasterNodes = INITIAL_MASTER_NODES_SETTING.get(settings); final boolean isConfigured = Stream.of(DISCOVERY_HOSTS_PROVIDER_SETTING, DISCOVERY_ZEN_PING_UNICAST_HOSTS_SETTING, INITIAL_MASTER_NODE_COUNT_SETTING, INITIAL_MASTER_NODES_SETTING).anyMatch(s -> s.exists(settings)); - unconfiguredDiscoveryTimeout = isConfigured ? null : UNCONFIGURED_DISCOVERY_TIMEOUT_SETTING.get(settings); + unconfiguredDiscoveryTimeout = isConfigured ? null : UNCONFIGURED_BOOTSTRAP_TIMEOUT_SETTING.get(settings); this.transportService = transportService; } diff --git a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java index 9bde10fa5572b..e7f5c461c6985 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java @@ -472,7 +472,7 @@ public void apply(Settings value, Settings current, Settings previous) { TransportAddVotingTombstonesAction.MAXIMUM_VOTING_TOMBSTONES_SETTING, ClusterBootstrapService.INITIAL_MASTER_NODES_SETTING, ClusterBootstrapService.INITIAL_MASTER_NODE_COUNT_SETTING, - ClusterBootstrapService.UNCONFIGURED_DISCOVERY_TIMEOUT_SETTING, + ClusterBootstrapService.UNCONFIGURED_BOOTSTRAP_TIMEOUT_SETTING, LagDetector.CLUSTER_FOLLOWER_LAG_TIMEOUT_SETTING ))); From 774dd77f445014e6c1ebdc1a45ca6f7e8e9a2735 Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 5 Dec 2018 08:16:22 +0000 Subject: [PATCH 04/15] Log at INFO --- .../cluster/coordination/ClusterBootstrapService.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java b/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java index 2e67f53c86ced..10972faf673ab 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java @@ -91,7 +91,7 @@ public void start() { } if (unconfiguredDiscoveryTimeout != null) { - logger.debug("discovery not configured, performing best-effort cluster formation after [{}]", unconfiguredDiscoveryTimeout); + logger.info("discovery not configured, will perform best-effort cluster bootstrapping after [{}]", unconfiguredDiscoveryTimeout); final ThreadContext threadContext = transportService.getThreadPool().getThreadContext(); try (ThreadContext.StoredContext ignore = threadContext.stashContext()) { threadContext.markAsSystemContext(); From 7d2706523c11238c4c5d1db204ff0bfc8b07f9c4 Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 5 Dec 2018 08:19:14 +0000 Subject: [PATCH 05/15] Moar rename --- .../cluster/coordination/ClusterBootstrapService.java | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java b/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java index 10972faf673ab..41b9bf49c319b 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java @@ -69,7 +69,7 @@ public class ClusterBootstrapService { private final int initialMasterNodeCount; private final List initialMasterNodes; @Nullable - private final TimeValue unconfiguredDiscoveryTimeout; + private final TimeValue unconfiguredBootstrapTimeout; private final TransportService transportService; private volatile boolean running; @@ -78,7 +78,7 @@ public ClusterBootstrapService(Settings settings, TransportService transportServ initialMasterNodes = INITIAL_MASTER_NODES_SETTING.get(settings); final boolean isConfigured = Stream.of(DISCOVERY_HOSTS_PROVIDER_SETTING, DISCOVERY_ZEN_PING_UNICAST_HOSTS_SETTING, INITIAL_MASTER_NODE_COUNT_SETTING, INITIAL_MASTER_NODES_SETTING).anyMatch(s -> s.exists(settings)); - unconfiguredDiscoveryTimeout = isConfigured ? null : UNCONFIGURED_BOOTSTRAP_TIMEOUT_SETTING.get(settings); + unconfiguredBootstrapTimeout = isConfigured ? null : UNCONFIGURED_BOOTSTRAP_TIMEOUT_SETTING.get(settings); this.transportService = transportService; } @@ -90,13 +90,14 @@ public void start() { return; } - if (unconfiguredDiscoveryTimeout != null) { - logger.info("discovery not configured, will perform best-effort cluster bootstrapping after [{}]", unconfiguredDiscoveryTimeout); + if (unconfiguredBootstrapTimeout != null) { + logger.info("discovery not configured, will perform best-effort cluster bootstrapping after [{}]", + unconfiguredBootstrapTimeout); final ThreadContext threadContext = transportService.getThreadPool().getThreadContext(); try (ThreadContext.StoredContext ignore = threadContext.stashContext()) { threadContext.markAsSystemContext(); - transportService.getThreadPool().scheduleUnlessShuttingDown(unconfiguredDiscoveryTimeout, Names.SAME, new Runnable() { + transportService.getThreadPool().scheduleUnlessShuttingDown(unconfiguredBootstrapTimeout, Names.SAME, new Runnable() { @Override public void run() { final GetDiscoveredNodesRequest request = new GetDiscoveredNodesRequest(); From 45528da79f355239eb1b92bfe94b93f084a20f1b Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 5 Dec 2018 08:20:35 +0000 Subject: [PATCH 06/15] rename runnable --- .../cluster/coordination/ClusterBootstrapService.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java b/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java index 41b9bf49c319b..80b8181fae776 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java @@ -129,7 +129,7 @@ public GetDiscoveredNodesResponse read(StreamInput in) throws IOException { @Override public String toString() { - return "development-mode delayed bootstrap"; + return "unconfigured-discovery delayed bootstrap"; } }); From 02f5b28bfbf896bee2ae230b962256ad01f0183e Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 5 Dec 2018 08:20:39 +0000 Subject: [PATCH 07/15] Rename test --- .../cluster/coordination/ClusterBootstrapServiceTests.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/ClusterBootstrapServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/ClusterBootstrapServiceTests.java index 1ddcf01916187..9377b05645366 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/ClusterBootstrapServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/ClusterBootstrapServiceTests.java @@ -135,7 +135,7 @@ private void testConfiguredIfSettingSet(Builder builder) { deterministicTaskQueue.runAllTasks(); } - public void testBootstrapsByDefaultInDevelopmentMode() { + public void testBootstrapsAutomaticallyWithDefaultConfiguration() { clusterBootstrapService = new ClusterBootstrapService(Settings.EMPTY, transportService); final Set discoveredNodes = Stream.of(localNode, otherNode1, otherNode2).collect(Collectors.toSet()); From ccc124d593288607db51ae012608360a27daa69e Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 5 Dec 2018 11:29:05 +0000 Subject: [PATCH 08/15] better message --- .../cluster/coordination/ClusterBootstrapService.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java b/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java index 80b8181fae776..08e232010f3a2 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java @@ -91,8 +91,8 @@ public void start() { } if (unconfiguredBootstrapTimeout != null) { - logger.info("discovery not configured, will perform best-effort cluster bootstrapping after [{}]", - unconfiguredBootstrapTimeout); + logger.info("no discovery configuration found, will perform best-effort cluster bootstrapping after [{}] " + + "unless existing master is discovered", unconfiguredBootstrapTimeout); final ThreadContext threadContext = transportService.getThreadPool().getThreadContext(); try (ThreadContext.StoredContext ignore = threadContext.stashContext()) { threadContext.markAsSystemContext(); From 5cd1d0691227f690aba9a7d6c311b30112c66f5f Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 5 Dec 2018 12:08:46 +0000 Subject: [PATCH 09/15] Add bootstrap check that discovery is configured --- .../bootstrap/BootstrapChecks.java | 11 ++++++++++ .../coordination/ClusterBootstrapService.java | 9 +++++--- .../bootstrap/BootstrapChecksTests.java | 21 +++++++++++++++++++ 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/bootstrap/BootstrapChecks.java b/server/src/main/java/org/elasticsearch/bootstrap/BootstrapChecks.java index 0c433192ad6c0..dcf787344720c 100644 --- a/server/src/main/java/org/elasticsearch/bootstrap/BootstrapChecks.java +++ b/server/src/main/java/org/elasticsearch/bootstrap/BootstrapChecks.java @@ -23,6 +23,7 @@ import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; import org.apache.lucene.util.Constants; +import org.elasticsearch.cluster.coordination.ClusterBootstrapService; import org.elasticsearch.common.SuppressForbidden; import org.elasticsearch.common.io.PathUtils; import org.elasticsearch.common.transport.BoundTransportAddress; @@ -207,6 +208,7 @@ static List checks() { checks.add(new EarlyAccessCheck()); checks.add(new G1GCCheck()); checks.add(new AllPermissionCheck()); + checks.add(new DiscoveryConfiguredCheck()); return Collections.unmodifiableList(checks); } @@ -713,4 +715,13 @@ boolean isAllPermissionGranted() { } + static class DiscoveryConfiguredCheck implements BootstrapCheck { + @Override + public BootstrapCheckResult check(BootstrapContext context) { + if (ClusterBootstrapService.discoveryIsConfigured(context.settings)) { + return BootstrapCheckResult.success(); + } + return BootstrapCheckResult.failure("the default discovery settings are unsuitable for production use"); + } + } } diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java b/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java index 08e232010f3a2..51206ff563e4b 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java @@ -76,12 +76,15 @@ public class ClusterBootstrapService { public ClusterBootstrapService(Settings settings, TransportService transportService) { initialMasterNodeCount = INITIAL_MASTER_NODE_COUNT_SETTING.get(settings); initialMasterNodes = INITIAL_MASTER_NODES_SETTING.get(settings); - final boolean isConfigured = Stream.of(DISCOVERY_HOSTS_PROVIDER_SETTING, DISCOVERY_ZEN_PING_UNICAST_HOSTS_SETTING, - INITIAL_MASTER_NODE_COUNT_SETTING, INITIAL_MASTER_NODES_SETTING).anyMatch(s -> s.exists(settings)); - unconfiguredBootstrapTimeout = isConfigured ? null : UNCONFIGURED_BOOTSTRAP_TIMEOUT_SETTING.get(settings); + unconfiguredBootstrapTimeout = discoveryIsConfigured(settings) ? null : UNCONFIGURED_BOOTSTRAP_TIMEOUT_SETTING.get(settings); this.transportService = transportService; } + public static boolean discoveryIsConfigured(Settings settings) { + return Stream.of(DISCOVERY_HOSTS_PROVIDER_SETTING, DISCOVERY_ZEN_PING_UNICAST_HOSTS_SETTING, + INITIAL_MASTER_NODE_COUNT_SETTING, INITIAL_MASTER_NODES_SETTING).anyMatch(s -> s.exists(settings)); + } + public void start() { assert running == false; running = true; diff --git a/server/src/test/java/org/elasticsearch/bootstrap/BootstrapChecksTests.java b/server/src/test/java/org/elasticsearch/bootstrap/BootstrapChecksTests.java index 1e18135f4eb72..1bac5abda7c8b 100644 --- a/server/src/test/java/org/elasticsearch/bootstrap/BootstrapChecksTests.java +++ b/server/src/test/java/org/elasticsearch/bootstrap/BootstrapChecksTests.java @@ -21,10 +21,14 @@ import org.apache.logging.log4j.Logger; import org.apache.lucene.util.Constants; +import org.elasticsearch.cluster.coordination.ClusterBootstrapService; import org.elasticsearch.cluster.metadata.MetaData; +import org.elasticsearch.common.CheckedConsumer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.transport.BoundTransportAddress; import org.elasticsearch.common.transport.TransportAddress; +import org.elasticsearch.discovery.DiscoveryModule; +import org.elasticsearch.discovery.zen.SettingsBasedHostsProvider; import org.elasticsearch.monitor.jvm.JvmInfo; import org.elasticsearch.node.NodeValidationException; import org.elasticsearch.test.ESTestCase; @@ -698,4 +702,21 @@ public boolean alwaysEnforce() { assertThat(alwaysEnforced, hasToString(containsString("error"))); } + public void testDiscoveryConfiguredCheck() throws NodeValidationException { + final List checks = Collections.singletonList(new BootstrapChecks.DiscoveryConfiguredCheck()); + final NodeValidationException e = expectThrows( + NodeValidationException.class, + () -> BootstrapChecks.check(defaultContext, true, checks)); + assertThat(e, hasToString(containsString("the default discovery settings are unsuitable for production use"))); + + BootstrapChecks.check(defaultContext, false, checks); // not always enforced + + CheckedConsumer ensureChecksPass = b -> + BootstrapChecks.check(new BootstrapContext(b.build(), MetaData.EMPTY_META_DATA), true, checks); + + ensureChecksPass.accept(Settings.builder().putList(DiscoveryModule.DISCOVERY_HOSTS_PROVIDER_SETTING.getKey())); + ensureChecksPass.accept(Settings.builder().putList(SettingsBasedHostsProvider.DISCOVERY_ZEN_PING_UNICAST_HOSTS_SETTING.getKey())); + ensureChecksPass.accept(Settings.builder().put(ClusterBootstrapService.INITIAL_MASTER_NODE_COUNT_SETTING.getKey(), 0)); + ensureChecksPass.accept(Settings.builder().putList(ClusterBootstrapService.INITIAL_MASTER_NODES_SETTING.getKey())); + } } From 1ea77f4a5cabab5339362a7f88be9cbee04d621f Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 5 Dec 2018 12:25:50 +0000 Subject: [PATCH 10/15] Add docs for bootstrap check --- docs/reference/setup/bootstrap-checks.asciidoc | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/docs/reference/setup/bootstrap-checks.asciidoc b/docs/reference/setup/bootstrap-checks.asciidoc index 03f98fd38acf0..52a1ab38ebd74 100644 --- a/docs/reference/setup/bootstrap-checks.asciidoc +++ b/docs/reference/setup/bootstrap-checks.asciidoc @@ -236,3 +236,21 @@ versions of the HotSpot JVM. The all permission check ensures that the security policy used during bootstrap does not grant the `java.security.AllPermission` to Elasticsearch. Running with the all permission granted is equivalent to disabling the security manager. + +=== Discovery configuration check + +By default, when Elasticsearch first starts up it will try and discover other +nodes running on the same host. If no elected master can be discovered within a +few seconds then Elasticsearch will form a cluster that includes any other +nodes that were discovered. It is useful to be able to form this cluster +without any extra configuration in development mode, but this is unsuitable for +production because it's possible to form multiple clusters and lose data as a +result. + +This bootstrap check ensures that discovery is not running with the default +configuration. It can be satisfied by setting at least one of the following +properties: + +- `discovery.zen.hosts_provider` +- `discovery.zen.ping.unicast.hosts` +- `cluster.initial_master_nodes` From 43fdbd00f5f2e737c215c562af1ab4bd38ca1c51 Mon Sep 17 00:00:00 2001 From: David Turner Date: Fri, 7 Dec 2018 06:58:08 +0000 Subject: [PATCH 11/15] Do not start bootstrap service if already bootstrapped --- .../cluster/coordination/ClusterBootstrapService.java | 1 - .../org/elasticsearch/cluster/coordination/Coordinator.java | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java b/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java index 51206ff563e4b..410ec5b2c4371 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterBootstrapService.java @@ -179,7 +179,6 @@ public GetDiscoveredNodesResponse read(StreamInput in) throws IOException { } public void stop() { - assert running == true; running = false; } diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java index 4869ace49466d..3bf4ffab27ff7 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java @@ -501,7 +501,9 @@ public void startInitialJoin() { becomeCandidate("startInitialJoin"); } - clusterBootstrapService.start(); + if (isInitialConfigurationSet() == false) { + clusterBootstrapService.start(); + } } @Override From b911f4fdc0be57a0ba9b3fcb4ed41f5d2f0da36b Mon Sep 17 00:00:00 2001 From: David Turner Date: Fri, 7 Dec 2018 07:00:03 +0000 Subject: [PATCH 12/15] Only apply check in Zen2, and list the settings --- .../bootstrap/BootstrapChecks.java | 15 ++++++++++- .../bootstrap/BootstrapChecksTests.java | 25 ++++++++++++++----- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/bootstrap/BootstrapChecks.java b/server/src/main/java/org/elasticsearch/bootstrap/BootstrapChecks.java index dcf787344720c..bfea1569aa0e3 100644 --- a/server/src/main/java/org/elasticsearch/bootstrap/BootstrapChecks.java +++ b/server/src/main/java/org/elasticsearch/bootstrap/BootstrapChecks.java @@ -26,6 +26,7 @@ import org.elasticsearch.cluster.coordination.ClusterBootstrapService; import org.elasticsearch.common.SuppressForbidden; import org.elasticsearch.common.io.PathUtils; +import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.transport.BoundTransportAddress; import org.elasticsearch.common.transport.TransportAddress; import org.elasticsearch.discovery.DiscoveryModule; @@ -47,6 +48,12 @@ import java.util.function.Predicate; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.elasticsearch.cluster.coordination.ClusterBootstrapService.INITIAL_MASTER_NODES_SETTING; +import static org.elasticsearch.discovery.DiscoveryModule.DISCOVERY_HOSTS_PROVIDER_SETTING; +import static org.elasticsearch.discovery.zen.SettingsBasedHostsProvider.DISCOVERY_ZEN_PING_UNICAST_HOSTS_SETTING; /** * We enforce bootstrap checks once a node has the transport protocol bound to a non-loopback interface or if the system property {@code @@ -718,10 +725,16 @@ boolean isAllPermissionGranted() { static class DiscoveryConfiguredCheck implements BootstrapCheck { @Override public BootstrapCheckResult check(BootstrapContext context) { + if (DiscoveryModule.ZEN2_DISCOVERY_TYPE.equals(DiscoveryModule.DISCOVERY_TYPE_SETTING.get(context.settings)) == false) { + return BootstrapCheckResult.success(); + } if (ClusterBootstrapService.discoveryIsConfigured(context.settings)) { return BootstrapCheckResult.success(); } - return BootstrapCheckResult.failure("the default discovery settings are unsuitable for production use"); + return BootstrapCheckResult.failure("the default discovery settings are unsuitable for production use; at least one of [" + + Stream.of(DISCOVERY_ZEN_PING_UNICAST_HOSTS_SETTING, DISCOVERY_HOSTS_PROVIDER_SETTING, INITIAL_MASTER_NODES_SETTING) + .map(Setting::getKey).collect(Collectors.joining(", ")) + + "] must be configured"); } } } diff --git a/server/src/test/java/org/elasticsearch/bootstrap/BootstrapChecksTests.java b/server/src/test/java/org/elasticsearch/bootstrap/BootstrapChecksTests.java index 23b286f28f854..b3ac4bc6157b7 100644 --- a/server/src/test/java/org/elasticsearch/bootstrap/BootstrapChecksTests.java +++ b/server/src/test/java/org/elasticsearch/bootstrap/BootstrapChecksTests.java @@ -706,15 +706,28 @@ public boolean alwaysEnforce() { public void testDiscoveryConfiguredCheck() throws NodeValidationException { final List checks = Collections.singletonList(new BootstrapChecks.DiscoveryConfiguredCheck()); - final NodeValidationException e = expectThrows( - NodeValidationException.class, - () -> BootstrapChecks.check(defaultContext, true, checks)); - assertThat(e, hasToString(containsString("the default discovery settings are unsuitable for production use"))); - BootstrapChecks.check(defaultContext, false, checks); // not always enforced + final BootstrapContext zen2Context = new BootstrapContext(Settings.builder() + .put(DiscoveryModule.DISCOVERY_TYPE_SETTING.getKey(), ZEN2_DISCOVERY_TYPE).build(), MetaData.EMPTY_META_DATA); + + // not always enforced + BootstrapChecks.check(zen2Context, false, checks); + + // not enforced for non-zen2 discovery + BootstrapChecks.check(new BootstrapContext(Settings.builder().put(DiscoveryModule.DISCOVERY_TYPE_SETTING.getKey(), + randomFrom(ZEN_DISCOVERY_TYPE, "single-node", randomAlphaOfLength(5))).build(), MetaData.EMPTY_META_DATA), true, checks); + + final NodeValidationException e = expectThrows(NodeValidationException.class, + () -> BootstrapChecks.check(zen2Context, true, checks)); + assertThat(e, hasToString(containsString("the default discovery settings are unsuitable for production use; at least one " + + "of [discovery.zen.ping.unicast.hosts, discovery.zen.hosts_provider, cluster.initial_master_nodes] must be configured"))); CheckedConsumer ensureChecksPass = b -> - BootstrapChecks.check(new BootstrapContext(b.build(), MetaData.EMPTY_META_DATA), true, checks); + { + final BootstrapContext context = new BootstrapContext(b + .put(DiscoveryModule.DISCOVERY_TYPE_SETTING.getKey(), ZEN2_DISCOVERY_TYPE).build(), MetaData.EMPTY_META_DATA); + BootstrapChecks.check(context, true, checks); + }; ensureChecksPass.accept(Settings.builder().putList(DiscoveryModule.DISCOVERY_HOSTS_PROVIDER_SETTING.getKey())); ensureChecksPass.accept(Settings.builder().putList(SettingsBasedHostsProvider.DISCOVERY_ZEN_PING_UNICAST_HOSTS_SETTING.getKey())); From 54fffaff727650d576648137e983f666c99143f9 Mon Sep 17 00:00:00 2001 From: David Turner Date: Fri, 7 Dec 2018 07:01:55 +0000 Subject: [PATCH 13/15] All the cool checks are using String.format --- .../org/elasticsearch/bootstrap/BootstrapChecks.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/bootstrap/BootstrapChecks.java b/server/src/main/java/org/elasticsearch/bootstrap/BootstrapChecks.java index bfea1569aa0e3..22ee36039dd28 100644 --- a/server/src/main/java/org/elasticsearch/bootstrap/BootstrapChecks.java +++ b/server/src/main/java/org/elasticsearch/bootstrap/BootstrapChecks.java @@ -731,10 +731,12 @@ public BootstrapCheckResult check(BootstrapContext context) { if (ClusterBootstrapService.discoveryIsConfigured(context.settings)) { return BootstrapCheckResult.success(); } - return BootstrapCheckResult.failure("the default discovery settings are unsuitable for production use; at least one of [" - + Stream.of(DISCOVERY_ZEN_PING_UNICAST_HOSTS_SETTING, DISCOVERY_HOSTS_PROVIDER_SETTING, INITIAL_MASTER_NODES_SETTING) - .map(Setting::getKey).collect(Collectors.joining(", ")) - + "] must be configured"); + + return BootstrapCheckResult.failure(String.format( + Locale.ROOT, + "the default discovery settings are unsuitable for production use; at least one of [%s] must be configured", + Stream.of(DISCOVERY_ZEN_PING_UNICAST_HOSTS_SETTING, DISCOVERY_HOSTS_PROVIDER_SETTING, INITIAL_MASTER_NODES_SETTING) + .map(Setting::getKey).collect(Collectors.joining(", ")))); } } } From 4a1e99f54b6e145b669c992e96256ed7a25d3818 Mon Sep 17 00:00:00 2001 From: David Turner Date: Fri, 7 Dec 2018 07:53:16 +0000 Subject: [PATCH 14/15] Add breaking changes note --- docs/reference/migration/migrate_7_0/cluster.asciidoc | 11 ++++++++++- docs/reference/setup/bootstrap-checks.asciidoc | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/docs/reference/migration/migrate_7_0/cluster.asciidoc b/docs/reference/migration/migrate_7_0/cluster.asciidoc index 7343154175b22..732270706ff3d 100644 --- a/docs/reference/migration/migrate_7_0/cluster.asciidoc +++ b/docs/reference/migration/migrate_7_0/cluster.asciidoc @@ -24,4 +24,13 @@ These shard preferences are removed in favour of the `_prefer_nodes` and `_only_ Clusters now have soft limits on the total number of open shards in the cluster based on the number of nodes and the `cluster.max_shards_per_node` cluster setting, to prevent accidental operations that would destabilize the cluster. -More information can be found in the <>. \ No newline at end of file +More information can be found in the <>. + +[float] +==== Discovery configuration is required in production +Production deployments of Elasticsearch now require at least one of the following settings +to be specified in the `elasticsearch.yml` configuration file: + +- `discovery.zen.ping.unicast.hosts` +- `discovery.zen.hosts_provider` +- `cluster.initial_master_nodes` diff --git a/docs/reference/setup/bootstrap-checks.asciidoc b/docs/reference/setup/bootstrap-checks.asciidoc index 52a1ab38ebd74..9cf3620636a41 100644 --- a/docs/reference/setup/bootstrap-checks.asciidoc +++ b/docs/reference/setup/bootstrap-checks.asciidoc @@ -251,6 +251,6 @@ This bootstrap check ensures that discovery is not running with the default configuration. It can be satisfied by setting at least one of the following properties: -- `discovery.zen.hosts_provider` - `discovery.zen.ping.unicast.hosts` +- `discovery.zen.hosts_provider` - `cluster.initial_master_nodes` From 00ef7ea8a727feb36c5c0f865783c91bb9a3f82f Mon Sep 17 00:00:00 2001 From: David Turner Date: Fri, 7 Dec 2018 08:57:11 +0000 Subject: [PATCH 15/15] Expand TODO --- qa/unconfigured-node-name/build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qa/unconfigured-node-name/build.gradle b/qa/unconfigured-node-name/build.gradle index 3e41118964799..4df768b57a4dc 100644 --- a/qa/unconfigured-node-name/build.gradle +++ b/qa/unconfigured-node-name/build.gradle @@ -22,7 +22,7 @@ apply plugin: 'elasticsearch.rest-test' integTestCluster { setting 'node.name', null - // TODO: Run this using zen2 + // TODO: Run this using zen2, with no discovery configuration at all, demonstrating that the node forms a cluster on its own without help setting 'discovery.type', 'zen' }