elastic · Tim-Brooks · Jan 17, 2019 · Jan 15, 2019 · Jan 15, 2019 · Jan 15, 2019
diff --git a/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/CcrSettings.java b/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/CcrSettings.java
@@ -7,6 +7,8 @@
 
 import org.elasticsearch.common.settings.Setting;
 import org.elasticsearch.common.settings.Setting.Property;
+import org.elasticsearch.common.unit.ByteSizeUnit;
+import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.common.unit.TimeValue;
 import org.elasticsearch.xpack.core.XPackSettings;
 
@@ -35,6 +37,14 @@ private CcrSettings() {
     public static final Setting<TimeValue> CCR_AUTO_FOLLOW_WAIT_FOR_METADATA_TIMEOUT = Setting.timeSetting(
         "ccr.auto_follow.wait_for_metadata_timeout", TimeValue.timeValueSeconds(60), Property.NodeScope, Property.Dynamic);
 
+
+    /**
+     * Max bytes a follower node can recover per second.
+     */
+    public static final Setting<ByteSizeValue> FOLLOWER_RECOVERY_MAX_BYTES_READ_PER_SECOND =
+        Setting.byteSizeSetting("ccr.follower.recovery.max_bytes_per_sec", new ByteSizeValue(40, ByteSizeUnit.MB),
+            Setting.Property.Dynamic, Setting.Property.NodeScope);
+
     /**
      * The settings defined by CCR.
      *
@@ -44,6 +54,7 @@ static List<Setting<?>> getSettings() {
         return Arrays.asList(
                 XPackSettings.CCR_ENABLED_SETTING,
                 CCR_FOLLOWING_INDEX_SETTING,
+                FOLLOWER_RECOVERY_MAX_BYTES_READ_PER_SECOND,
                 CCR_AUTO_FOLLOW_WAIT_FOR_METADATA_TIMEOUT);
     }
 

diff --git a/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/repository/CcrRepository.java b/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/repository/CcrRepository.java
@@ -7,6 +7,7 @@
 package org.elasticsearch.xpack.ccr.repository;
 
 import org.apache.lucene.index.IndexCommit;
+import org.apache.lucene.store.RateLimiter;
 import org.elasticsearch.Version;
 import org.elasticsearch.action.admin.cluster.state.ClusterStateRequest;
 import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse;
@@ -24,6 +25,7 @@
 import org.elasticsearch.common.collect.ImmutableOpenMap;
 import org.elasticsearch.common.component.AbstractLifecycleComponent;
 import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.metrics.CounterMetric;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.index.Index;
@@ -66,6 +68,9 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.concurrent.atomic.AtomicLong;
+
+import static org.elasticsearch.xpack.ccr.CcrSettings.FOLLOWER_RECOVERY_MAX_BYTES_READ_PER_SECOND;
 
 /**
  * This repository relies on a remote cluster for Ccr restores. It is read-only so it can only be used to
@@ -83,13 +88,18 @@ public class CcrRepository extends AbstractLifecycleComponent implements Reposit
     private final Client client;
     private final CcrLicenseChecker ccrLicenseChecker;
 
+    private final RateLimiter.SimpleRateLimiter rateLimiter;
+    private final CounterMetric throttledTime = new CounterMetric();
+    private final AtomicLong bytesSinceLastPause = new AtomicLong();
+
     public CcrRepository(RepositoryMetaData metadata, Client client, CcrLicenseChecker ccrLicenseChecker, Settings settings) {
         super(settings);
         this.metadata = metadata;
         assert metadata.name().startsWith(NAME_PREFIX) : "CcrRepository metadata.name() must start with: " + NAME_PREFIX;
         this.remoteClusterAlias = Strings.split(metadata.name(), NAME_PREFIX)[1];
         this.ccrLicenseChecker = ccrLicenseChecker;
         this.client = client;
+        this.rateLimiter = new RateLimiter.SimpleRateLimiter(FOLLOWER_RECOVERY_MAX_BYTES_READ_PER_SECOND.get(settings).getMbFrac());
     }
 
     @Override
@@ -207,7 +217,7 @@ public long getSnapshotThrottleTimeInNanos() {
 
     @Override
     public long getRestoreThrottleTimeInNanos() {
-        return 0;
+        return throttledTime.count();
     }
 
     @Override
@@ -258,7 +268,7 @@ public void restoreShard(IndexShard indexShard, SnapshotId snapshotId, Version v
         // TODO: There should be some local timeout. And if the remote cluster returns an unknown session
         //  response, we should be able to retry by creating a new session.
         String name = metadata.name();
-        try (RestoreSession restoreSession = RestoreSession.openSession(name, remoteClient, leaderShardId, indexShard, recoveryState)) {
+        try (RestoreSession restoreSession = openSession(name, remoteClient, leaderShardId, indexShard, recoveryState)) {
             restoreSession.restoreFiles();
         } catch (Exception e) {
             throw new IndexShardRestoreFailedException(indexShard.shardId(), "failed to restore snapshot [" + snapshotId + "]", e);
@@ -286,7 +296,16 @@ private void maybeUpdateMappings(Client localClient, Client remoteClient, Index
         }
     }
 
-    private static class RestoreSession extends FileRestoreContext implements Closeable {
+    private RestoreSession openSession(String repositoryName, Client remoteClient, ShardId leaderShardId, IndexShard indexShard,
+                                       RecoveryState recoveryState) {
+        String sessionUUID = UUIDs.randomBase64UUID();
+        PutCcrRestoreSessionAction.PutCcrRestoreSessionResponse response = remoteClient.execute(PutCcrRestoreSessionAction.INSTANCE,
+            new PutCcrRestoreSessionRequest(sessionUUID, leaderShardId)).actionGet();
+        return new RestoreSession(repositoryName, remoteClient, sessionUUID, response.getNode(), indexShard, recoveryState,
+            response.getStoreFileMetaData());
+    }
+
+    private class RestoreSession extends FileRestoreContext implements Closeable {
 
         private static final int BUFFER_SIZE = 1 << 16;
 
@@ -304,15 +323,6 @@ private static class RestoreSession extends FileRestoreContext implements Closea
             this.sourceMetaData = sourceMetaData;
         }
 
-        static RestoreSession openSession(String repositoryName, Client remoteClient, ShardId leaderShardId, IndexShard indexShard,
-                                          RecoveryState recoveryState) {
-            String sessionUUID = UUIDs.randomBase64UUID();
-            PutCcrRestoreSessionAction.PutCcrRestoreSessionResponse response = remoteClient.execute(PutCcrRestoreSessionAction.INSTANCE,
-                new PutCcrRestoreSessionRequest(sessionUUID, leaderShardId)).actionGet();
-            return new RestoreSession(repositoryName, remoteClient, sessionUUID, response.getNode(), indexShard, recoveryState,
-                response.getStoreFileMetaData());
-        }
-
         void restoreFiles() throws IOException {
             ArrayList<BlobStoreIndexShardSnapshot.FileInfo> fileInfos = new ArrayList<>();
             for (StoreFileMetaData fileMetaData : sourceMetaData) {
@@ -336,7 +346,7 @@ public void close() {
         }
     }
 
-    private static class RestoreFileInputStream extends InputStream {
+    private class RestoreFileInputStream extends InputStream {
 
         private final Client remoteClient;
         private final String sessionUUID;
@@ -366,6 +376,9 @@ public int read(byte[] bytes, int off, int len) throws IOException {
             }
 
             int bytesRequested = (int) Math.min(remainingBytes, len);
+
+            maybePause(bytesRequested);
+
             String fileName = fileToRecover.name();
             GetCcrRestoreFileChunkRequest request = new GetCcrRestoreFileChunkRequest(node, sessionUUID, fileName, bytesRequested);
             GetCcrRestoreFileChunkAction.GetCcrRestoreFileChunkResponse response =
@@ -389,5 +402,17 @@ public int read(byte[] bytes, int off, int len) throws IOException {
 
             return bytesReceived;
         }
+
+        private void maybePause(int bytesRequested) {
+            long bytesSincePause = bytesSinceLastPause.addAndGet(bytesRequested);
+            if (bytesSincePause > rateLimiter.getMinPauseCheckBytes()) {
+                // Time to pause
+                bytesSinceLastPause.addAndGet(-bytesSincePause);
+                long throttleTimeInNanos = rateLimiter.pause(bytesSincePause);
+                if (throttleTimeInNanos > 0) {
+                    throttledTime.inc(throttleTimeInNanos);
+                }
+            }
+        }
     }
 }
diff --git a/x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/CcrIntegTestCase.java b/x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/CcrIntegTestCase.java
@@ -111,6 +111,10 @@ public abstract class CcrIntegTestCase extends ESTestCase {
 
     @Before
     public final void startClusters() throws Exception {
+        startClusters(Settings.EMPTY);
+    }
+
+    private void startClusters(Settings additionalSettings) throws Exception {
         if (clusterGroup != null && reuseClusters()) {
             clusterGroup.leaderCluster.ensureAtMostNumDataNodes(numberOfNodesPerCluster());
             clusterGroup.followerCluster.ensureAtMostNumDataNodes(numberOfNodesPerCluster());
@@ -122,8 +126,8 @@ public final void startClusters() throws Exception {
             TestZenDiscovery.TestPlugin.class, MockHttpTransport.TestPlugin.class, getTestTransportPlugin());
 
         InternalTestCluster leaderCluster = new InternalTestCluster(randomLong(), createTempDir(), true, true, numberOfNodesPerCluster(),
-            numberOfNodesPerCluster(), UUIDs.randomBase64UUID(random()), createNodeConfigurationSource(null), 0, "leader", mockPlugins,
-            Function.identity());
+            numberOfNodesPerCluster(), UUIDs.randomBase64UUID(random()), createNodeConfigurationSource(null, additionalSettings), 0,
+            "leader", mockPlugins, Function.identity());
         leaderCluster.beforeTest(random(), 0.0D);
         leaderCluster.ensureAtLeastNumDataNodes(numberOfNodesPerCluster());
         assertBusy(() -> {
@@ -133,8 +137,8 @@ public final void startClusters() throws Exception {
 
         String address = leaderCluster.getDataNodeInstance(TransportService.class).boundAddress().publishAddress().toString();
         InternalTestCluster followerCluster = new InternalTestCluster(randomLong(), createTempDir(), true, true, numberOfNodesPerCluster(),
-            numberOfNodesPerCluster(), UUIDs.randomBase64UUID(random()), createNodeConfigurationSource(address), 0, "follower",
-            mockPlugins, Function.identity());
+            numberOfNodesPerCluster(), UUIDs.randomBase64UUID(random()), createNodeConfigurationSource(address, additionalSettings), 0,
+            "follower", mockPlugins, Function.identity());
         clusterGroup = new ClusterGroup(leaderCluster, followerCluster);
 
         followerCluster.beforeTest(random(), 0.0D);
@@ -145,6 +149,11 @@ public final void startClusters() throws Exception {
         });
     }
 
+    protected void restartClustersWithSettings(Settings settings) throws Exception {
+        stopClusters();
+        startClusters(settings);
+    }
+
     /**
      * Follower indices don't get all the settings from leader, for example 'index.unassigned.node_left.delayed_timeout'
      * is not replicated and if tests kill nodes, we have to wait 60s by default...
@@ -180,7 +189,7 @@ public void afterTest() throws Exception {
         }
     }
 
-    private NodeConfigurationSource createNodeConfigurationSource(String leaderSeedAddress) {
+    private NodeConfigurationSource createNodeConfigurationSource(String leaderSeedAddress, Settings settings) {
         Settings.Builder builder = Settings.builder();
         builder.put(NodeEnvironment.MAX_LOCAL_STORAGE_NODES_SETTING.getKey(), Integer.MAX_VALUE);
         // Default the watermarks to absurdly low to prevent the tests
@@ -202,6 +211,7 @@ private NodeConfigurationSource createNodeConfigurationSource(String leaderSeedA
         builder.put(LicenseService.SELF_GENERATED_LICENSE_TYPE.getKey(), "trial");
         // Let cluster state api return quickly in order to speed up auto follow tests:
         builder.put(CcrSettings.CCR_AUTO_FOLLOW_WAIT_FOR_METADATA_TIMEOUT.getKey(), TimeValue.timeValueMillis(100));
+        builder.put(settings);
         if (configureRemoteClusterViaNodeSettings() && leaderSeedAddress != null) {
             builder.put("cluster.remote.leader_cluster.seeds", leaderSeedAddress);
         }

diff --git a/x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/CcrRepositoryIT.java b/x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/CcrRepositoryIT.java
@@ -21,6 +21,7 @@
 import org.elasticsearch.cluster.service.ClusterService;
 import org.elasticsearch.common.collect.ImmutableOpenMap;
 import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.common.unit.TimeValue;
 import org.elasticsearch.common.xcontent.XContentType;
 import org.elasticsearch.common.xcontent.support.XContentMapValues;
@@ -38,6 +39,8 @@
 import org.elasticsearch.xpack.ccr.repository.CcrRestoreSourceService;
 
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.concurrent.TimeUnit;
@@ -235,6 +238,60 @@ public void testDocsAreRecovered() throws Exception {
         thread.join();
     }
 
+    public void testRateLimitingIsEmployed() throws Exception {
+        restartClustersWithSettings(Settings.builder().put(CcrSettings.FOLLOWER_RECOVERY_MAX_BYTES_READ_PER_SECOND.getKey(),
+            new ByteSizeValue(500)).build());
+        String leaderClusterRepoName = CcrRepository.NAME_PREFIX + "leader_cluster";
+        String leaderIndex = "index1";
+        String followerIndex = "index2";
+
+        final int numberOfPrimaryShards = randomIntBetween(1, 3);
+        final String leaderIndexSettings = getIndexSettings(numberOfPrimaryShards, between(0, 1),
+            singletonMap(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), "true"));
+        assertAcked(leaderClient().admin().indices().prepareCreate(leaderIndex).setSource(leaderIndexSettings, XContentType.JSON));
+        ensureLeaderGreen(leaderIndex);
+
+        final RestoreService restoreService = getFollowerCluster().getCurrentMasterNodeInstance(RestoreService.class);
+        final ClusterService clusterService = getFollowerCluster().getCurrentMasterNodeInstance(ClusterService.class);
+
+        List<CcrRepository> repositories = new ArrayList<>();
+        try {
+            for (RepositoriesService repositoriesService : getFollowerCluster().getDataOrMasterNodeInstances(RepositoriesService.class))  {
+                Repository repository = repositoriesService.repository(leaderClusterRepoName);
+                repositories.add((CcrRepository) repository);
+            }
+        } catch (RepositoryMissingException e) {
+            fail("need repository");
+        }
+
+        final int firstBatchNumDocs = 10;
+        logger.info("Indexing [{}] docs as first batch", firstBatchNumDocs);
+        for (int i = 0; i < firstBatchNumDocs; i++) {
+            final String source = String.format(Locale.ROOT, "{\"f\":%d}", i);
+            leaderClient().prepareIndex("index1", "doc", Integer.toString(i)).setSource(source, XContentType.JSON).get();
+        }
+
+        leaderClient().admin().indices().prepareFlush(leaderIndex).setForce(true).setWaitIfOngoing(true).get();
+
+        try {
+            Settings.Builder settingsBuilder = Settings.builder()
+                .put(IndexMetaData.SETTING_INDEX_PROVIDED_NAME, followerIndex)
+                .put(CcrSettings.CCR_FOLLOWING_INDEX_SETTING.getKey(), true);
+            RestoreService.RestoreRequest restoreRequest = new RestoreService.RestoreRequest(leaderClusterRepoName,
+                CcrRepository.LATEST, new String[]{leaderIndex}, indicesOptions,
+                "^(.*)$", followerIndex, Settings.EMPTY, new TimeValue(1, TimeUnit.HOURS), false,
+                false, true, settingsBuilder.build(), new String[0],
+                "restore_snapshot[" + leaderClusterRepoName + ":" + leaderIndex + "]");
+
+            PlainActionFuture<RestoreInfo> future = PlainActionFuture.newFuture();
+            restoreService.restoreSnapshot(restoreRequest, waitForRestore(clusterService, future));
+
+            assertBusy(() -> assertTrue(repositories.stream().anyMatch(cr -> cr.getRestoreThrottleTimeInNanos() > 0)));
+        } finally {
+            restartClustersWithSettings(Settings.EMPTY);
+        }
+    }
+
     public void testFollowerMappingIsUpdated() throws IOException {
         String leaderClusterRepoName = CcrRepository.NAME_PREFIX + "leader_cluster";
         String leaderIndex = "index1";