elastic · jpountz · Mar 30, 2016 · Mar 30, 2016
diff --git a/core/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java b/core/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java
@@ -107,7 +107,16 @@ protected AbstractSearchAsyncAction(ESLogger logger, SearchTransportService sear
             request.indices());
 
         shardsIts = clusterService.operationRouting().searchShards(clusterState, concreteIndices, routingMap, request.preference());
-        expectedSuccessfulOps = shardsIts.size();
+        final int shardCount = shardsIts.size();
+        final long shardCountLimit = clusterService.getClusterSettings().get(TransportSearchAction.SHARD_COUNT_LIMIT_SETTING);
+        if (shardCount > shardCountLimit) {
+            throw new IllegalArgumentException("Trying to query " + shardCount + " shards, which is over the limit of "
+                    + shardCountLimit + ". This limit exists because querying many shards at the same time can make the "
+                    + "job of the coordinating node very CPU and/or memory intensive. It is usually a better idea to "
+                    + "have a smaller number of larger shards. Update [" + TransportSearchAction.SHARD_COUNT_LIMIT_SETTING.getKey()
+                    + "] to a greater value if you really want to query that many shards at the same time.");
+        }
+        expectedSuccessfulOps = shardCount;
         // we need to add 1 for non active partition, since we count it in the total!
         expectedTotalOps = shardsIts.totalSizeWith1ForEmpty();
 

diff --git a/core/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java b/core/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java
@@ -26,6 +26,8 @@
 import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
 import org.elasticsearch.cluster.service.ClusterService;
 import org.elasticsearch.common.inject.Inject;
+import org.elasticsearch.common.settings.Setting;
+import org.elasticsearch.common.settings.Setting.Property;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.index.IndexNotFoundException;
 import org.elasticsearch.indices.IndexClosedException;
@@ -45,6 +47,10 @@
  */
 public class TransportSearchAction extends HandledTransportAction<SearchRequest, SearchResponse> {
 
+    /** The maximum number of shards for a single search request. */
+    public static final Setting<Long> SHARD_COUNT_LIMIT_SETTING = Setting.longSetting(
+            "action.search.shard_count.limit", 1000L, 1L, Property.Dynamic, Property.NodeScope);
+
     private final ClusterService clusterService;
     private final SearchTransportService searchTransportService;
     private final SearchPhaseController searchPhaseController;

diff --git a/core/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java b/core/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java
@@ -19,6 +19,7 @@
 package org.elasticsearch.common.settings;
 
 import org.elasticsearch.action.admin.indices.close.TransportCloseIndexAction;
+import org.elasticsearch.action.search.TransportSearchAction;
 import org.elasticsearch.action.support.AutoCreateIndex;
 import org.elasticsearch.action.support.DestructiveOperations;
 import org.elasticsearch.action.support.master.TransportMasterNodeReadAction;
@@ -258,6 +259,7 @@ public void apply(Settings value, Settings current, Settings previous) {
                     ClusterService.CLUSTER_SERVICE_SLOW_TASK_LOGGING_THRESHOLD_SETTING,
                     SearchService.DEFAULT_SEARCH_TIMEOUT_SETTING,
                     ElectMasterService.DISCOVERY_ZEN_MINIMUM_MASTER_NODES_SETTING,
+                    TransportSearchAction.SHARD_COUNT_LIMIT_SETTING,
                     TransportService.TRACE_LOG_EXCLUDE_SETTING,
                     TransportService.TRACE_LOG_INCLUDE_SETTING,
                     TransportCloseIndexAction.CLUSTER_INDICES_CLOSE_ENABLE_SETTING,

diff --git a/core/src/test/java/org/elasticsearch/action/search/TransportSearchIT.java b/core/src/test/java/org/elasticsearch/action/search/TransportSearchIT.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.action.search;
+
+import org.elasticsearch.cluster.metadata.IndexMetaData;
+import org.elasticsearch.test.ESIntegTestCase;
+
+import java.util.Collections;
+
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
+import static org.hamcrest.Matchers.containsString;
+
+public class TransportSearchIT extends ESIntegTestCase {
+
+    public void testShardCountLimit() throws Exception {
+        try {
+            final int numPrimaries1 = randomIntBetween(2, 10);
+            final int numPrimaries2 = randomIntBetween(1, 10);
+            assertAcked(prepareCreate("test1")
+                    .setSettings(IndexMetaData.SETTING_NUMBER_OF_SHARDS, numPrimaries1));
+            assertAcked(prepareCreate("test2")
+                    .setSettings(IndexMetaData.SETTING_NUMBER_OF_SHARDS, numPrimaries2));
+            ensureYellow("test1", "test2");
+
+            // no exception
+            client().prepareSearch("test1").get();
+
+            assertAcked(client().admin().cluster().prepareUpdateSettings()
+                    .setTransientSettings(Collections.singletonMap(
+                            TransportSearchAction.SHARD_COUNT_LIMIT_SETTING.getKey(), numPrimaries1 - 1)));
+
+            IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
+                    () -> client().prepareSearch("test1").get());
+            assertThat(e.getMessage(), containsString("Trying to query " + numPrimaries1
+                    + " shards, which is over the limit of " + (numPrimaries1 - 1)));
+
+            assertAcked(client().admin().cluster().prepareUpdateSettings()
+                    .setTransientSettings(Collections.singletonMap(
+                            TransportSearchAction.SHARD_COUNT_LIMIT_SETTING.getKey(), numPrimaries1)));
+
+            // no exception
+            client().prepareSearch("test1").get();
+
+            e = expectThrows(IllegalArgumentException.class,
+                    () -> client().prepareSearch("test1", "test2").get());
+            assertThat(e.getMessage(), containsString("Trying to query " + (numPrimaries1 + numPrimaries2)
+                    + " shards, which is over the limit of " + numPrimaries1));
+
+        } finally {
+            assertAcked(client().admin().cluster().prepareUpdateSettings()
+                    .setTransientSettings(Collections.singletonMap(
+                            TransportSearchAction.SHARD_COUNT_LIMIT_SETTING.getKey(), null)));
+        }
+    }
+
+}
diff --git a/docs/reference/search/search.asciidoc b/docs/reference/search/search.asciidoc
@@ -49,3 +49,10 @@ Or even search across all indices and all types:
 --------------------------------------------------
 $ curl -XGET 'http://localhost:9200/_search?q=tag:wow'
 --------------------------------------------------
+
+By default elasticsearch rejects search requests that would query more than
+1000 shards. The reason is that such large numbers of shards make the job of
+the coordinating node very CPU and memory intensive. It is usually a better
+idea to organize data in such a way that there are fewer larger shards. In
+case you would like to bypass this limit, which is discouraged, you can update
+the `action.search.shard_count.limit` cluster setting to a greater value.