apache · zhilinli123 · Mar 14, 2023 · Mar 14, 2023 · Mar 14, 2023 · Mar 15, 2023
diff --git a/docs/en/connector-v2/sink/Elasticsearch.md b/docs/en/connector-v2/sink/Elasticsearch.md
@@ -30,6 +30,7 @@ Engine Supported
 | password                | string  | no       |               |
 | max_retry_count         | int     | no       | 3             |
 | max_batch_size          | int     | no       | 10            |
+| batch_interval_ms       | int     | no       | 1000          |
 | tls_verify_certificate  | boolean | no       | true          |
 | tls_verify_hostnames    | boolean | no       | true          |
 | tls_keystore_path       | string  | no       | -             |
@@ -75,6 +76,10 @@ one bulk request max try size
 
 batch bulk doc max size
 
+### batch_interval_ms [int]
+
+batch interval milliSecond
+
 ### tls_verify_certificate [boolean]
 
 Enable certificates validation for HTTPS endpoints

diff --git a/docs/en/faq.md b/docs/en/faq.md
@@ -223,7 +223,7 @@ For example, if you want to set the JDK version to JDK8, there are two cases:
   }
   ```
 - Yarn cluster does not deploy JDK8. At this time, when you start SeaTunnel attached with JDK8.For detailed operations, see the link below:
-  https://www.cnblogs.com/jasondan/p/spark-specific-jdk-version.html
+  [here](https://www.cnblogs.com/jasondan/p/spark-specific-jdk-version.html).
 
 ## What should I do if OOM always appears when running SeaTunnel in Spark local[*] mode?
 
@@ -336,7 +336,7 @@ spark-submit --verbose
 
 ## How to use SeaTunnel to synchronize data across HDFS clusters?
 
-Just configure hdfs-site.xml properly, refer to: https://www.cnblogs.com/suanec/p/7828139.html
+Just configure hdfs-site.xml properly, refer to: [here](https://www.cnblogs.com/suanec/p/7828139.html)
 
 ## I want to learn the source code of SeaTunnel, where should I start?
 

diff --git a/.../main/java/org/apache/seatunnel/connectors/seatunnel/elasticsearch/config/SinkConfig.java b/.../main/java/org/apache/seatunnel/connectors/seatunnel/elasticsearch/config/SinkConfig.java
@@ -58,6 +58,13 @@ public class SinkConfig {
                     .defaultValue(10)
                     .withDescription("batch bulk doc max size");
 
+    @SuppressWarnings("checkstyle:MagicNumber")
+    public static final Option<Integer> BATCH_INTERVAL_MS =
+            Options.key("batch_interval_ms")
+                    .intType()
+                    .defaultValue(1000)
+                    .withDescription("batch interval milliSecond");
+
     @SuppressWarnings("checkstyle:MagicNumber")
     public static final Option<Integer> MAX_RETRY_COUNT =
             Options.key("max_retry_count")

diff --git a/.../java/org/apache/seatunnel/connectors/seatunnel/elasticsearch/sink/ElasticsearchSink.java b/.../java/org/apache/seatunnel/connectors/seatunnel/elasticsearch/sink/ElasticsearchSink.java
@@ -25,6 +25,8 @@
 import org.apache.seatunnel.api.table.type.SeaTunnelDataType;
 import org.apache.seatunnel.api.table.type.SeaTunnelRow;
 import org.apache.seatunnel.api.table.type.SeaTunnelRowType;
+import org.apache.seatunnel.common.exception.CommonErrorCode;
+import org.apache.seatunnel.connectors.seatunnel.elasticsearch.exception.ElasticsearchConnectorException;
 import org.apache.seatunnel.connectors.seatunnel.elasticsearch.state.ElasticsearchAggregatedCommitInfo;
 import org.apache.seatunnel.connectors.seatunnel.elasticsearch.state.ElasticsearchCommitInfo;
 import org.apache.seatunnel.connectors.seatunnel.elasticsearch.state.ElasticsearchSinkState;
@@ -33,6 +35,7 @@
 
 import java.util.Collections;
 
+import static org.apache.seatunnel.connectors.seatunnel.elasticsearch.config.SinkConfig.BATCH_INTERVAL_MS;
 import static org.apache.seatunnel.connectors.seatunnel.elasticsearch.config.SinkConfig.MAX_BATCH_SIZE;
 import static org.apache.seatunnel.connectors.seatunnel.elasticsearch.config.SinkConfig.MAX_RETRY_COUNT;
 
@@ -51,6 +54,8 @@ public class ElasticsearchSink
 
     private int maxRetryCount = MAX_RETRY_COUNT.defaultValue();
 
+    private int batchIntervalMs = BATCH_INTERVAL_MS.defaultValue();
-    private int batchIntervalMs = BATCH_INTERVAL_MS.defaultValue();
+    private Integer batchIntervalMs = BATCH_INTERVAL_MS.defaultValue();
-    private int batchIntervalMs = BATCH_INTERVAL_MS.defaultValue();
+    private Integer batchIntervalMs = BATCH_INTERVAL_MS.defaultValue();
+
     @Override
     public String getPluginName() {
         return "Elasticsearch";
@@ -65,6 +70,16 @@ public void prepare(Config pluginConfig) throws PrepareFailException {
         if (pluginConfig.hasPath(MAX_RETRY_COUNT.key())) {
             maxRetryCount = pluginConfig.getInt(MAX_RETRY_COUNT.key());
         }
+        if (pluginConfig.hasPath(BATCH_INTERVAL_MS.key())) {
+            batchIntervalMs = pluginConfig.getInt(BATCH_INTERVAL_MS.key());
+        }
+        if (maxBatchSize < 0 || maxRetryCount < 0 || batchIntervalMs < 0) {
+            throw new ElasticsearchConnectorException(
+                    CommonErrorCode.ILLEGAL_ARGUMENT,
+                    "An invalid parameter should be a positive integer greater than zero "
+                            + "Check the following parameters "
+                            + "max_batch_size、batch_interval_ms、max_retry_count ");
+        }
     }
 
     @Override
@@ -86,6 +101,7 @@ public SinkWriter<SeaTunnelRow, ElasticsearchCommitInfo, ElasticsearchSinkState>
                 pluginConfig,
                 maxBatchSize,
                 maxRetryCount,
+                batchIntervalMs,
                 Collections.emptyList());
     }
 }
diff --git a/...org/apache/seatunnel/connectors/seatunnel/elasticsearch/sink/ElasticsearchSinkWriter.java b/...org/apache/seatunnel/connectors/seatunnel/elasticsearch/sink/ElasticsearchSinkWriter.java
@@ -42,6 +42,11 @@
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Optional;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.ScheduledFuture;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
 
 /**
  * ElasticsearchSinkWriter is a sink writer that will write {@link SeaTunnelRow} to Elasticsearch.
@@ -54,21 +59,29 @@ public class ElasticsearchSinkWriter
 
     private final int maxBatchSize;
 
+    private final int batchIntervalMs;
+
     private final SeaTunnelRowSerializer seaTunnelRowSerializer;
     private final List<String> requestEsList;
     private EsRestClient esRestClient;
     private RetryMaterial retryMaterial;
     private static final long DEFAULT_SLEEP_TIME_MS = 200L;
 
+    private transient ScheduledExecutorService scheduler;
+    private transient ScheduledFuture<?> scheduledFuture;
+    private transient boolean isClose;
+
     public ElasticsearchSinkWriter(
             SinkWriter.Context context,
             SeaTunnelRowType seaTunnelRowType,
             Config pluginConfig,
             int maxBatchSize,
             int maxRetryCount,
+            int batchIntervalMs,
             List<ElasticsearchSinkState> elasticsearchStates) {
         this.context = context;
         this.maxBatchSize = maxBatchSize;
+        this.batchIntervalMs = batchIntervalMs;
 
         IndexInfo indexInfo = new IndexInfo(pluginConfig);
         esRestClient = EsRestClient.createInstance(pluginConfig);
@@ -79,6 +92,8 @@ public ElasticsearchSinkWriter(
         this.requestEsList = new ArrayList<>(maxBatchSize);
         this.retryMaterial =
                 new RetryMaterial(maxRetryCount, true, exception -> true, DEFAULT_SLEEP_TIME_MS);
+        // Initialize the interval flush
+        open();
     }
 
     @Override
@@ -94,6 +109,32 @@ public void write(SeaTunnelRow element) {
         }
     }
 
+    public void open() {
-    public void open() {
+    public void startScheduler() {
-    public void open() {
+    public void startScheduler() {
+        this.scheduler =
+                Executors.newScheduledThreadPool(
+                        1,
+                        runnable -> {
+                            AtomicInteger cnt = new AtomicInteger(0);
+                            Thread thread = new Thread(runnable);
+                            thread.setDaemon(true);
+                            thread.setName(
+                                    "sink-elasticsearch-interval" + "-" + cnt.incrementAndGet());
+                            return thread;
+                        });
+        this.scheduledFuture =
+                this.scheduler.scheduleWithFixedDelay(
+                        () -> {
+                            synchronized (ElasticsearchSinkWriter.this) {
+                                if (requestEsList.size() > 0 && !isClose) {
+                                    bulkEsWithRetry(this.esRestClient, this.requestEsList);
+                                }
+                            }
+                        },
+                        this.batchIntervalMs,
+                        this.batchIntervalMs,
+                        TimeUnit.MILLISECONDS);
+    }
+
     @Override
     public Optional<ElasticsearchCommitInfo> prepareCommit() {
         bulkEsWithRetry(this.esRestClient, this.requestEsList);
@@ -116,6 +157,9 @@ public synchronized void bulkEsWithRetry(
                                         ElasticsearchConnectorErrorCode.BULK_RESPONSE_ERROR,
                                         "bulk es error: " + bulkResponse.getResponse());
                             }
+                            log.info(
+                                    "bulk es successfully written to the rowNum: "
+                                            + requestEsList.size());
                             return bulkResponse;
                         }
                         return null;
@@ -133,6 +177,12 @@ public synchronized void bulkEsWithRetry(
     @Override
     public void close() throws IOException {
         bulkEsWithRetry(this.esRestClient, this.requestEsList);
-        esRestClient.close();
+        this.isClose = true;
+        if (esRestClient != null) {
+            esRestClient.close();
+        }
+        if (this.scheduler != null) {
+            this.scheduler.shutdown();
+        }
     }
 }