From baa7cb89e40c55159414cc4d4c9c6ad5b2875d82 Mon Sep 17 00:00:00 2001
From: "YOMO.LEE" <75362129@qq.com>
Date: Tue, 22 Oct 2024 17:28:28 +0800
Subject: [PATCH 01/17] [Fix][Doc] Fix LocalFile doc (#7887)

Supplement and optimize the description of the LocalFile connector on filtering files
[(#7887)](https://github.com/apache/seatunnel/issues/7887)
---
 docs/en/connector-v2/source/LocalFile.md | 29 ++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
diff --git a/docs/en/connector-v2/source/LocalFile.md b/docs/en/connector-v2/source/LocalFile.md
index 6d11b992e3a..533d7fa91bf 100644
--- a/docs/en/connector-v2/source/LocalFile.md
+++ b/docs/en/connector-v2/source/LocalFile.md
@@ -254,6 +254,12 @@ Specifies Whether to process data using the tag attribute format.
 
 Filter pattern, which used for filtering files.
 
+The filtering format is similar to wildcard matching file names in Linux.
+
+However, it should be noted that unlike Linux wildcard characters, when encountering file suffixes, the middle dot cannot be omitted.
+
+For example, `abc20241022.csv`, the normal Linux wildcard `abc*` is sufficient, but here we need to use `abc*.*` , Pay attention to a point in the middle.
+
 ### compress_codec [string]
 
 The compress codec of files and the details that supported as the following shown:
@@ -406,6 +412,29 @@ sink {
 
 ```
 
+### Filter File
+
+```hocon
+env {
+  parallelism = 1
+  job.mode = "BATCH"
+}
+
+source {
+  LocalFile {
+    path = "/seatunnel/read/"
+    file_format_type = "csv"
+    skip_header_row_number = 1
+    // file example abcD2024.csv
+    file_filter_pattern = "abc[DX]*.*"
+  }
+}
+sink {
+  Console {
+  }
+}
+```
+
 ## Changelog
 
 ### 2.2.0-beta 2022-09-26

From 427668128471aea7fb34dcca10666bd079fe79ea Mon Sep 17 00:00:00 2001
From: "YOMO.LEE" <75362129@qq.com>
Date: Fri, 25 Oct 2024 18:34:23 +0800
Subject: [PATCH 02/17] [Fix][Connector-V2][ClickHouse] Fix ClickHouse Bug
 (#7897)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1、When the ClickHouse connector is set to multi parallelism, the task extraction is completed but cannot be stopped normally
[(#7897)](https://github.com/apache/seatunnel/issues/7897)

2、Added E2E test cases for this issue [(#7897)](https://github.com/apache/seatunnel/issues/7897)

3、Local developers want to observe **Job Progress Information** in a timely manner,  Need to modify the following configuration.The configuration in config is invalid
```
seatunnel engine/seatunnel-engineer-common/src/main/resources/seatunnely.yaml
```
---
 .../clickhouse/source/ClickhouseSourceReader.java   | 13 +++++++++++--
 .../source/ClickhouseSourceSplitEnumerator.java     |  2 ++
 .../src/main/resources/seatunnel.yaml               |  1 +
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSourceReader.java b/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSourceReader.java
index 591334d9722..b21519083b7 100644
--- a/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSourceReader.java
+++ b/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSourceReader.java
@@ -17,6 +17,8 @@
 
 package org.apache.seatunnel.connectors.seatunnel.clickhouse.source;
 
+import lombok.extern.slf4j.Slf4j;
+import org.apache.seatunnel.api.source.Boundedness;
 import org.apache.seatunnel.api.source.Collector;
 import org.apache.seatunnel.api.source.SourceReader;
 import org.apache.seatunnel.api.table.type.SeaTunnelRow;
@@ -34,7 +36,7 @@
 import java.util.Collections;
 import java.util.List;
 import java.util.Random;
-
+@Slf4j
 public class ClickhouseSourceReader implements SourceReader<SeaTunnelRow, ClickhouseSourceSplit> {
 
     private final List<ClickHouseNode> servers;
@@ -43,6 +45,7 @@ public class ClickhouseSourceReader implements SourceReader<SeaTunnelRow, Clickh
     private final SourceReader.Context readerContext;
     private ClickHouseRequest<?> request;
     private final String sql;
+    private volatile boolean noMoreSplit;
 
     private final List<ClickhouseSourceSplit> splits;
 
@@ -97,6 +100,12 @@ record -> {
             }
             this.readerContext.signalNoMoreElement();
             this.splits.clear();
+        } else if (noMoreSplit
+                && splits.isEmpty()
+                && Boundedness.BOUNDED.equals(readerContext.getBoundedness())) {
+            log.info("Closed the bounded ClickHouse source");
+            this.readerContext.signalNoMoreElement();
+            this.splits.clear();
         }
     }
 
@@ -111,7 +120,7 @@ public void addSplits(List<ClickhouseSourceSplit> splits) {
     }
 
     @Override
-    public void handleNoMoreSplits() {}
+    public void handleNoMoreSplits() {noMoreSplit = true;}
 
     @Override
     public void notifyCheckpointComplete(long checkpointId) throws Exception {}
diff --git a/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSourceSplitEnumerator.java b/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSourceSplitEnumerator.java
index c0eb4b6c706..21937db9a81 100644
--- a/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSourceSplitEnumerator.java
+++ b/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSourceSplitEnumerator.java
@@ -77,6 +77,8 @@ public void registerReader(int subtaskId) {
         if (assigned < 0) {
             assigned = subtaskId;
             context.assignSplit(subtaskId, new ClickhouseSourceSplit());
+        } else {
+            context.signalNoMoreSplits(subtaskId);
         }
     }
 
diff --git a/seatunnel-engine/seatunnel-engine-common/src/main/resources/seatunnel.yaml b/seatunnel-engine/seatunnel-engine-common/src/main/resources/seatunnel.yaml
index c9bb71ecc07..8c5a136d1b0 100644
--- a/seatunnel-engine/seatunnel-engine-common/src/main/resources/seatunnel.yaml
+++ b/seatunnel-engine/seatunnel-engine-common/src/main/resources/seatunnel.yaml
@@ -20,6 +20,7 @@ seatunnel:
         backup-count: 1
         queue-type: blockingqueue
         print-execution-info-interval: 60
+        print-job-metrics-info-interval: 60
         slot-service:
             dynamic-slot: true
         checkpoint:

From 1b8066765f2328f4624fb4137b867566f4d2fa69 Mon Sep 17 00:00:00 2001
From: "YOMO.LEE" <75362129@qq.com>
Date: Fri, 25 Oct 2024 22:33:01 +0800
Subject: [PATCH 03/17] [Fix][Connector-V2][ClickHouse] Fix ClickHouse Bug
 (#7897)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1、When the ClickHouse connector is set to multi parallelism, the task extraction is completed but cannot be stopped normally
[(#7897)](https://github.com/apache/seatunnel/issues/7897)

2、Added E2E test cases for this issue [(#7897)](https://github.com/apache/seatunnel/issues/7897)

3、Local developers want to observe **Job Progress Information** in a timely manner,  Need to modify the following configuration.The configuration in config is invalid
```
seatunnel engine/seatunnel-engineer-common/src/main/resources/seatunnely.yaml
```
---
 .../source/ClickhouseSourceReader.java        |  7 ++-
 .../seatunnel/clickhouse/ClickhouseIT.java    |  7 +++
 .../test/resources/clickhouse_to_console.conf | 45 +++++++++++++++++++
 3 files changed, 57 insertions(+), 2 deletions(-)
 create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/resources/clickhouse_to_console.conf

diff --git a/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSourceReader.java b/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSourceReader.java
index b21519083b7..fd3c6300b5d 100644
--- a/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSourceReader.java
+++ b/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSourceReader.java
@@ -17,7 +17,6 @@
 
 package org.apache.seatunnel.connectors.seatunnel.clickhouse.source;
 
-import lombok.extern.slf4j.Slf4j;
 import org.apache.seatunnel.api.source.Boundedness;
 import org.apache.seatunnel.api.source.Collector;
 import org.apache.seatunnel.api.source.SourceReader;
@@ -30,12 +29,14 @@
 import com.clickhouse.client.ClickHouseNode;
 import com.clickhouse.client.ClickHouseRequest;
 import com.clickhouse.client.ClickHouseResponse;
+import lombok.extern.slf4j.Slf4j;
 
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.Random;
+
 @Slf4j
 public class ClickhouseSourceReader implements SourceReader<SeaTunnelRow, ClickhouseSourceSplit> {
 
@@ -120,7 +121,9 @@ public void addSplits(List<ClickhouseSourceSplit> splits) {
     }
 
     @Override
-    public void handleNoMoreSplits() {noMoreSplit = true;}
+    public void handleNoMoreSplits() {
+        noMoreSplit = true;
+    }
 
     @Override
     public void notifyCheckpointComplete(long checkpointId) throws Exception {}
diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/ClickhouseIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/ClickhouseIT.java
index 66ee281740c..3e8b0a090fc 100644
--- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/ClickhouseIT.java
+++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/ClickhouseIT.java
@@ -101,6 +101,13 @@ public void testClickhouse(TestContainer container) throws Exception {
         clearSinkTable();
     }
 
+    @TestTemplate
+    public void testSourceParallelism(TestContainer container) throws Exception {
+        System.out.println("=========多并行度测试===========");
+        Container.ExecResult execResult = container.executeJob("/clickhouse_to_console.conf");
+        System.out.println(execResult.getExitCode());
+    }
+
     @BeforeAll
     @Override
     public void startUp() throws Exception {
diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/resources/clickhouse_to_console.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/resources/clickhouse_to_console.conf
new file mode 100644
index 00000000000..755131276ba
--- /dev/null
+++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/resources/clickhouse_to_console.conf
@@ -0,0 +1,45 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+######
+###### This config file is a demonstration of streaming processing in seatunnel config
+######
+
+env {
+  parallelism = 2
+  job.mode = "BATCH"
+}
+
+source {
+  # This is a example source plugin **only for test and demonstrate the feature source plugin**
+  Clickhouse {
+    host = "clickhouse:8123"
+    database = "default"
+    sql = "select * from source_table"
+    username = "default"
+    password = ""
+    result_table_name = "source_table"
+  }
+  # If you would like to get more information about how to configure seatunnel and see full list of source plugins,
+  # please go to https://seatunnel.apache.org/docs/connector-v2/source/ClickhouseSource
+}
+
+sink {
+  console {
+  }
+  # If you would like to get more information about how to configure seatunnel and see full list of sink plugins,
+  # please go to https://seatunnel.apache.org/docs/connector-v2/sink
+}
\ No newline at end of file

From e64b8a67ddd8ae6730813c66cb2e929e2768d6eb Mon Sep 17 00:00:00 2001
From: "YOMO.LEE" <75362129@qq.com>
Date: Sat, 26 Oct 2024 12:02:28 +0800
Subject: [PATCH 04/17] [Fix][Connector-V2][ClickHouse] Fix ClickHouse Bug
 (#7897)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1、When the ClickHouse connector is set to multi parallelism, the task extraction is completed but cannot be stopped normally
[(#7897)](https://github.com/apache/seatunnel/issues/7897)

2、Added E2E test cases for this issue [(#7897)](https://github.com/apache/seatunnel/issues/7897)

3、Local developers want to observe **Job Progress Information** in a timely manner, Need to modify the following configuration.The configuration in config is invalid
```
seatunnel engine/seatunnel-engineer-common/src/main/resources/seatunnely.yaml
```
---
 .../connectors/seatunnel/clickhouse/ClickhouseIT.java         | 4 ++--
 .../src/test/resources/clickhouse_to_console.conf             | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/ClickhouseIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/ClickhouseIT.java
index 3e8b0a090fc..e0a09dc56dc 100644
--- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/ClickhouseIT.java
+++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/ClickhouseIT.java
@@ -103,9 +103,9 @@ public void testClickhouse(TestContainer container) throws Exception {
 
     @TestTemplate
     public void testSourceParallelism(TestContainer container) throws Exception {
-        System.out.println("=========多并行度测试===========");
+        LOG.info("=========多并行度测试===========");
         Container.ExecResult execResult = container.executeJob("/clickhouse_to_console.conf");
-        System.out.println(execResult.getExitCode());
+        Assertions.assertEquals(0, execResult.getExitCode());
     }
 
     @BeforeAll
diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/resources/clickhouse_to_console.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/resources/clickhouse_to_console.conf
index 755131276ba..e996be8e4a2 100644
--- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/resources/clickhouse_to_console.conf
+++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/resources/clickhouse_to_console.conf
@@ -19,7 +19,7 @@
 ######
 
 env {
-  parallelism = 2
+  parallelism = 3
   job.mode = "BATCH"
 }
 

From 42e591925056841d1299effa0112d8134d5d3070 Mon Sep 17 00:00:00 2001
From: "YOMO.LEE" <75362129@qq.com>
Date: Sat, 26 Oct 2024 14:21:46 +0800
Subject: [PATCH 05/17] [Fix][Doc] Fix LocalFile doc (#7887)

Continue to optimize the document about filtering files and add some examples
[(#7887)](https://github.com/apache/seatunnel/issues/7887)
---
 docs/en/connector-v2/source/LocalFile.md | 60 ++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/docs/en/connector-v2/source/LocalFile.md b/docs/en/connector-v2/source/LocalFile.md
index 533d7fa91bf..077537f6887 100644
--- a/docs/en/connector-v2/source/LocalFile.md
+++ b/docs/en/connector-v2/source/LocalFile.md
@@ -256,10 +256,70 @@ Filter pattern, which used for filtering files.
 
 The filtering format is similar to wildcard matching file names in Linux.
 
+| Wildcard     | Meaning                                                                                                                        | Example                                                                                                                                      |
+|--------------|--------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------|
+| *            | Match 0 or more characters                                                                                                     | f* &emsp;&ensp;&emsp; Any file starting with f<br/>b*.txt &emsp; Any file starting with b, any character in the middle, and ending with. txt |
+| []           | Match a single character in parentheses                                                                                        | [abc]* &emsp; A file that starts with any one of the characters a, b, or c                                                                   |
+| ?            | Match any single character                                                                                                     | f?.txt &emsp; Any file starting with 'f' followed by a character and ending with '. txt'                                                     |
+| [!]          | Match any single character not in parentheses                                                                                  | [!abc]* &emsp; Any file that does not start with abc                                                                                         |
+| [a-z]        | Match any single character from a to z                                                                                         | [a-z]* &emsp; Any file starting with a to z                                                                                                  |
+| {a,b,c}/a..z | When separated by commas, it represents individual characters<br/>When separated by two dots, represents continuous characters | {a,b,c}* &emsp; Files starting with any character from abc<br/>{a..Z}* &emsp;&ensp; Files starting with any character from a to z            |
+
 However, it should be noted that unlike Linux wildcard characters, when encountering file suffixes, the middle dot cannot be omitted.
 
 For example, `abc20241022.csv`, the normal Linux wildcard `abc*` is sufficient, but here we need to use `abc*.*` , Pay attention to a point in the middle.
 
+File Structure Example:
+```
+report.txt
+notes.txt
+input.csv
+abch20241022.csv
+abcw20241022.csv
+abcx20241022.csv
+abcq20241022.csv
+abcg20241022.csv
+abcv20241022.csv
+abcb20241022.csv
+old_data.csv
+logo.png
+script.sh
+helpers.sh
+```
+Matching Rules Example:
+
+**Example 1**: *Match all .txt files*，Regular Expression:
+```
+*.txt
+```
+The result of this example matching is:
+```
+report.txt
+notes.txt
+```
+**Example 2**: *Match all Any file starting with abc*，Regular Expression:
+```
+abc*.csv
+```
+The result of this example matching is:
+```
+abch20241022.csv
+abcw20241022.csv
+abcx20241022.csv
+abcq20241022.csv
+abcg20241022.csv
+abcv20241022.csv
+abcb20241022.csv
+```
+**Example 3**: *Match all Any file starting with abc，And the fourth character is either x or g*, the Regular Expression:
+```
+abc[x,g]*.csv
+```
+The result of this example matching is:
+```
+abcx20241022.csv
+abcg20241022.csv
+```
 ### compress_codec [string]
 
 The compress codec of files and the details that supported as the following shown:

From 2e9162d61e5e7074daee1e3a053a795fad8f2d40 Mon Sep 17 00:00:00 2001
From: "YOMO.LEE" <75362129@qq.com>
Date: Sat, 26 Oct 2024 21:44:17 +0800
Subject: [PATCH 06/17] [Fix][Doc] Fix LocalFile doc (#7887)

Change to English log
[(#7887)](https://github.com/apache/seatunnel/issues/7887)
---
 .../seatunnel/connectors/seatunnel/clickhouse/ClickhouseIT.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/ClickhouseIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/ClickhouseIT.java
index e0a09dc56dc..b4093fd4f12 100644
--- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/ClickhouseIT.java
+++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/ClickhouseIT.java
@@ -103,7 +103,7 @@ public void testClickhouse(TestContainer container) throws Exception {
 
     @TestTemplate
     public void testSourceParallelism(TestContainer container) throws Exception {
-        LOG.info("=========多并行度测试===========");
+        LOG.info("=========Multi parallelism testing begins===========");
         Container.ExecResult execResult = container.executeJob("/clickhouse_to_console.conf");
         Assertions.assertEquals(0, execResult.getExitCode());
     }

From e564e7fe63750639d9778a7bc6e6c64037c3f2bf Mon Sep 17 00:00:00 2001
From: "YOMO.LEE" <75362129@qq.com>
Date: Sun, 27 Oct 2024 00:08:14 +0800
Subject: [PATCH 07/17] Revert "[Fix][Doc] Fix LocalFile doc (#7887)"

This reverts commit 2e9162d61e5e7074daee1e3a053a795fad8f2d40.
---
 .../seatunnel/connectors/seatunnel/clickhouse/ClickhouseIT.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/ClickhouseIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/ClickhouseIT.java
index b4093fd4f12..e0a09dc56dc 100644
--- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/ClickhouseIT.java
+++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/ClickhouseIT.java
@@ -103,7 +103,7 @@ public void testClickhouse(TestContainer container) throws Exception {
 
     @TestTemplate
     public void testSourceParallelism(TestContainer container) throws Exception {
-        LOG.info("=========Multi parallelism testing begins===========");
+        LOG.info("=========多并行度测试===========");
         Container.ExecResult execResult = container.executeJob("/clickhouse_to_console.conf");
         Assertions.assertEquals(0, execResult.getExitCode());
     }

From c5bcdf7262ca7f51e812d75cffcdfb7132686a91 Mon Sep 17 00:00:00 2001
From: "YOMO.LEE" <75362129@qq.com>
Date: Sun, 27 Oct 2024 00:08:31 +0800
Subject: [PATCH 08/17] Revert "[Fix][Connector-V2][ClickHouse] Fix ClickHouse
 Bug (#7897)"

This reverts commit e64b8a67ddd8ae6730813c66cb2e929e2768d6eb.
---
 .../connectors/seatunnel/clickhouse/ClickhouseIT.java         | 4 ++--
 .../src/test/resources/clickhouse_to_console.conf             | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/ClickhouseIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/ClickhouseIT.java
index e0a09dc56dc..3e8b0a090fc 100644
--- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/ClickhouseIT.java
+++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/ClickhouseIT.java
@@ -103,9 +103,9 @@ public void testClickhouse(TestContainer container) throws Exception {
 
     @TestTemplate
     public void testSourceParallelism(TestContainer container) throws Exception {
-        LOG.info("=========多并行度测试===========");
+        System.out.println("=========多并行度测试===========");
         Container.ExecResult execResult = container.executeJob("/clickhouse_to_console.conf");
-        Assertions.assertEquals(0, execResult.getExitCode());
+        System.out.println(execResult.getExitCode());
     }
 
     @BeforeAll
diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/resources/clickhouse_to_console.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/resources/clickhouse_to_console.conf
index e996be8e4a2..755131276ba 100644
--- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/resources/clickhouse_to_console.conf
+++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/resources/clickhouse_to_console.conf
@@ -19,7 +19,7 @@
 ######
 
 env {
-  parallelism = 3
+  parallelism = 2
   job.mode = "BATCH"
 }
 

From d02a01b5cc5233e2ee962553c758adf3a73ab381 Mon Sep 17 00:00:00 2001
From: "YOMO.LEE" <75362129@qq.com>
Date: Sun, 27 Oct 2024 00:09:10 +0800
Subject: [PATCH 09/17] Revert "[Fix][Connector-V2][ClickHouse] Fix ClickHouse
 Bug (#7897)"

This reverts commit 1b8066765f2328f4624fb4137b867566f4d2fa69.
---
 .../source/ClickhouseSourceReader.java        |  7 +--
 .../seatunnel/clickhouse/ClickhouseIT.java    |  7 ---
 .../test/resources/clickhouse_to_console.conf | 45 -------------------
 3 files changed, 2 insertions(+), 57 deletions(-)
 delete mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/resources/clickhouse_to_console.conf

diff --git a/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSourceReader.java b/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSourceReader.java
index fd3c6300b5d..b21519083b7 100644
--- a/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSourceReader.java
+++ b/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSourceReader.java
@@ -17,6 +17,7 @@
 
 package org.apache.seatunnel.connectors.seatunnel.clickhouse.source;
 
+import lombok.extern.slf4j.Slf4j;
 import org.apache.seatunnel.api.source.Boundedness;
 import org.apache.seatunnel.api.source.Collector;
 import org.apache.seatunnel.api.source.SourceReader;
@@ -29,14 +30,12 @@
 import com.clickhouse.client.ClickHouseNode;
 import com.clickhouse.client.ClickHouseRequest;
 import com.clickhouse.client.ClickHouseResponse;
-import lombok.extern.slf4j.Slf4j;
 
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.Random;
-
 @Slf4j
 public class ClickhouseSourceReader implements SourceReader<SeaTunnelRow, ClickhouseSourceSplit> {
 
@@ -121,9 +120,7 @@ public void addSplits(List<ClickhouseSourceSplit> splits) {
     }
 
     @Override
-    public void handleNoMoreSplits() {
-        noMoreSplit = true;
-    }
+    public void handleNoMoreSplits() {noMoreSplit = true;}
 
     @Override
     public void notifyCheckpointComplete(long checkpointId) throws Exception {}
diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/ClickhouseIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/ClickhouseIT.java
index 3e8b0a090fc..66ee281740c 100644
--- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/ClickhouseIT.java
+++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/ClickhouseIT.java
@@ -101,13 +101,6 @@ public void testClickhouse(TestContainer container) throws Exception {
         clearSinkTable();
     }
 
-    @TestTemplate
-    public void testSourceParallelism(TestContainer container) throws Exception {
-        System.out.println("=========多并行度测试===========");
-        Container.ExecResult execResult = container.executeJob("/clickhouse_to_console.conf");
-        System.out.println(execResult.getExitCode());
-    }
-
     @BeforeAll
     @Override
     public void startUp() throws Exception {
diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/resources/clickhouse_to_console.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/resources/clickhouse_to_console.conf
deleted file mode 100644
index 755131276ba..00000000000
--- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-clickhouse-e2e/src/test/resources/clickhouse_to_console.conf
+++ /dev/null
@@ -1,45 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-######
-###### This config file is a demonstration of streaming processing in seatunnel config
-######
-
-env {
-  parallelism = 2
-  job.mode = "BATCH"
-}
-
-source {
-  # This is a example source plugin **only for test and demonstrate the feature source plugin**
-  Clickhouse {
-    host = "clickhouse:8123"
-    database = "default"
-    sql = "select * from source_table"
-    username = "default"
-    password = ""
-    result_table_name = "source_table"
-  }
-  # If you would like to get more information about how to configure seatunnel and see full list of source plugins,
-  # please go to https://seatunnel.apache.org/docs/connector-v2/source/ClickhouseSource
-}
-
-sink {
-  console {
-  }
-  # If you would like to get more information about how to configure seatunnel and see full list of sink plugins,
-  # please go to https://seatunnel.apache.org/docs/connector-v2/sink
-}
\ No newline at end of file

From 52ee377ddfc4421282e810f8bd4bda4ecbefd3ef Mon Sep 17 00:00:00 2001
From: "YOMO.LEE" <75362129@qq.com>
Date: Sun, 27 Oct 2024 00:09:16 +0800
Subject: [PATCH 10/17] Revert "[Fix][Connector-V2][ClickHouse] Fix ClickHouse
 Bug (#7897)"

This reverts commit 427668128471aea7fb34dcca10666bd079fe79ea.
---
 .../clickhouse/source/ClickhouseSourceReader.java   | 13 ++-----------
 .../source/ClickhouseSourceSplitEnumerator.java     |  2 --
 .../src/main/resources/seatunnel.yaml               |  1 -
 3 files changed, 2 insertions(+), 14 deletions(-)

diff --git a/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSourceReader.java b/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSourceReader.java
index b21519083b7..591334d9722 100644
--- a/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSourceReader.java
+++ b/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSourceReader.java
@@ -17,8 +17,6 @@
 
 package org.apache.seatunnel.connectors.seatunnel.clickhouse.source;
 
-import lombok.extern.slf4j.Slf4j;
-import org.apache.seatunnel.api.source.Boundedness;
 import org.apache.seatunnel.api.source.Collector;
 import org.apache.seatunnel.api.source.SourceReader;
 import org.apache.seatunnel.api.table.type.SeaTunnelRow;
@@ -36,7 +34,7 @@
 import java.util.Collections;
 import java.util.List;
 import java.util.Random;
-@Slf4j
+
 public class ClickhouseSourceReader implements SourceReader<SeaTunnelRow, ClickhouseSourceSplit> {
 
     private final List<ClickHouseNode> servers;
@@ -45,7 +43,6 @@ public class ClickhouseSourceReader implements SourceReader<SeaTunnelRow, Clickh
     private final SourceReader.Context readerContext;
     private ClickHouseRequest<?> request;
     private final String sql;
-    private volatile boolean noMoreSplit;
 
     private final List<ClickhouseSourceSplit> splits;
 
@@ -100,12 +97,6 @@ record -> {
             }
             this.readerContext.signalNoMoreElement();
             this.splits.clear();
-        } else if (noMoreSplit
-                && splits.isEmpty()
-                && Boundedness.BOUNDED.equals(readerContext.getBoundedness())) {
-            log.info("Closed the bounded ClickHouse source");
-            this.readerContext.signalNoMoreElement();
-            this.splits.clear();
         }
     }
 
@@ -120,7 +111,7 @@ public void addSplits(List<ClickhouseSourceSplit> splits) {
     }
 
     @Override
-    public void handleNoMoreSplits() {noMoreSplit = true;}
+    public void handleNoMoreSplits() {}
 
     @Override
     public void notifyCheckpointComplete(long checkpointId) throws Exception {}
diff --git a/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSourceSplitEnumerator.java b/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSourceSplitEnumerator.java
index 21937db9a81..c0eb4b6c706 100644
--- a/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSourceSplitEnumerator.java
+++ b/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/source/ClickhouseSourceSplitEnumerator.java
@@ -77,8 +77,6 @@ public void registerReader(int subtaskId) {
         if (assigned < 0) {
             assigned = subtaskId;
             context.assignSplit(subtaskId, new ClickhouseSourceSplit());
-        } else {
-            context.signalNoMoreSplits(subtaskId);
         }
     }
 
diff --git a/seatunnel-engine/seatunnel-engine-common/src/main/resources/seatunnel.yaml b/seatunnel-engine/seatunnel-engine-common/src/main/resources/seatunnel.yaml
index 8c5a136d1b0..c9bb71ecc07 100644
--- a/seatunnel-engine/seatunnel-engine-common/src/main/resources/seatunnel.yaml
+++ b/seatunnel-engine/seatunnel-engine-common/src/main/resources/seatunnel.yaml
@@ -20,7 +20,6 @@ seatunnel:
         backup-count: 1
         queue-type: blockingqueue
         print-execution-info-interval: 60
-        print-job-metrics-info-interval: 60
         slot-service:
             dynamic-slot: true
         checkpoint:

From 0062ba4ece2da3dafec3f465abdc2fc66699612b Mon Sep 17 00:00:00 2001
From: "YOMO.LEE" <75362129@qq.com>
Date: Mon, 28 Oct 2024 17:10:26 +0800
Subject: [PATCH 11/17] [Fix][Doc] Fix LocalFile Doc

Optimize the describe about Regex
---
 docs/en/connector-v2/source/LocalFile.md | 72 +++++++++---------------
 1 file changed, 27 insertions(+), 45 deletions(-)

diff --git a/docs/en/connector-v2/source/LocalFile.md b/docs/en/connector-v2/source/LocalFile.md
index 077537f6887..f16cf1cf545 100644
--- a/docs/en/connector-v2/source/LocalFile.md
+++ b/docs/en/connector-v2/source/LocalFile.md
@@ -254,72 +254,54 @@ Specifies Whether to process data using the tag attribute format.
 
 Filter pattern, which used for filtering files.
 
-The filtering format is similar to wildcard matching file names in Linux.
-
-| Wildcard     | Meaning                                                                                                                        | Example                                                                                                                                      |
-|--------------|--------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------|
-| *            | Match 0 or more characters                                                                                                     | f* &emsp;&ensp;&emsp; Any file starting with f<br/>b*.txt &emsp; Any file starting with b, any character in the middle, and ending with. txt |
-| []           | Match a single character in parentheses                                                                                        | [abc]* &emsp; A file that starts with any one of the characters a, b, or c                                                                   |
-| ?            | Match any single character                                                                                                     | f?.txt &emsp; Any file starting with 'f' followed by a character and ending with '. txt'                                                     |
-| [!]          | Match any single character not in parentheses                                                                                  | [!abc]* &emsp; Any file that does not start with abc                                                                                         |
-| [a-z]        | Match any single character from a to z                                                                                         | [a-z]* &emsp; Any file starting with a to z                                                                                                  |
-| {a,b,c}/a..z | When separated by commas, it represents individual characters<br/>When separated by two dots, represents continuous characters | {a,b,c}* &emsp; Files starting with any character from abc<br/>{a..Z}* &emsp;&ensp; Files starting with any character from a to z            |
-
-However, it should be noted that unlike Linux wildcard characters, when encountering file suffixes, the middle dot cannot be omitted.
-
-For example, `abc20241022.csv`, the normal Linux wildcard `abc*` is sufficient, but here we need to use `abc*.*` , Pay attention to a point in the middle.
+The pattern follows standard regular expressions. For details, please refer to https://en.wikipedia.org/wiki/Regular_expression. learn it
 
 File Structure Example:
 ```
-report.txt
-notes.txt
-input.csv
-abch20241022.csv
-abcw20241022.csv
-abcx20241022.csv
-abcq20241022.csv
-abcg20241022.csv
-abcv20241022.csv
-abcb20241022.csv
-old_data.csv
-logo.png
-script.sh
-helpers.sh
+/data/seatunnel/20241001/report.txt
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+/data/seatunnel/20241005/old_data.csv
+/data/seatunnel/20241012/logo.png
 ```
 Matching Rules Example:
 
 **Example 1**: *Match all .txt files*，Regular Expression:
 ```
-*.txt
+/data/seatunnel/202410\d*/.*.txt
 ```
 The result of this example matching is:
 ```
-report.txt
-notes.txt
+/data/seatunnel/20241001/report.txt
 ```
-**Example 2**: *Match all Any file starting with abc*，Regular Expression:
+**Example 2**: *Match all file starting with abc*，Regular Expression:
 ```
-abc*.csv
+/data/seatunnel/202410\d*/abc.*
 ```
 The result of this example matching is:
 ```
-abch20241022.csv
-abcw20241022.csv
-abcx20241022.csv
-abcq20241022.csv
-abcg20241022.csv
-abcv20241022.csv
-abcb20241022.csv
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
 ```
-**Example 3**: *Match all Any file starting with abc，And the fourth character is either x or g*, the Regular Expression:
+**Example 3**: *Match all file starting with abc，And the fourth character is either x or g*, the Regular Expression:
 ```
-abc[x,g]*.csv
+/data/seatunnel/20241002/abc[x,g].*
 ```
 The result of this example matching is:
 ```
-abcx20241022.csv
-abcg20241022.csv
+/data/seatunnel/20241002/abcg202410.csv
+```
+**Example 4**: *Match third level folders starting with 202410 and files ending with .csv*, the Regular Expression:
+```
+/data/seatunnel/202410\d*/.*.csv
 ```
+The result of this example matching is:
+```
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+/data/seatunnel/20241005/old_data.csv
+```
+
 ### compress_codec [string]
 
 The compress codec of files and the details that supported as the following shown:
@@ -482,7 +464,7 @@ env {
 
 source {
   LocalFile {
-    path = "/seatunnel/read/"
+    path = "/data/seatunnel/"
     file_format_type = "csv"
     skip_header_row_number = 1
     // file example abcD2024.csv

From eae7b144ec42488117c42d5e75a833c44633de74 Mon Sep 17 00:00:00 2001
From: "YOMO.LEE" <75362129@qq.com>
Date: Tue, 29 Oct 2024 10:53:11 +0800
Subject: [PATCH 12/17] [Fix][DOC] LocalFile doc optimize

Optimize document structure
---
 docs/en/connector-v2/source/LocalFile.md | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/docs/en/connector-v2/source/LocalFile.md b/docs/en/connector-v2/source/LocalFile.md
index f16cf1cf545..379f4ebffb2 100644
--- a/docs/en/connector-v2/source/LocalFile.md
+++ b/docs/en/connector-v2/source/LocalFile.md
@@ -254,9 +254,7 @@ Specifies Whether to process data using the tag attribute format.
 
 Filter pattern, which used for filtering files.
 
-The pattern follows standard regular expressions. For details, please refer to https://en.wikipedia.org/wiki/Regular_expression. learn it
-
-File Structure Example:
+The pattern follows standard regular expressions. For details, please refer to https://en.wikipedia.org/wiki/Regular_expression.
 ```
 /data/seatunnel/20241001/report.txt
 /data/seatunnel/20241007/abch202410.csv

From d542c11270ab02517fdac0ed080df31a7271966e Mon Sep 17 00:00:00 2001
From: "YOMO.LEE" <75362129@qq.com>
Date: Tue, 29 Oct 2024 11:37:26 +0800
Subject: [PATCH 13/17] [Fix][DOC] LocalFile doc optimize

Optimize document structure
---
 docs/en/connector-v2/source/LocalFile.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docs/en/connector-v2/source/LocalFile.md b/docs/en/connector-v2/source/LocalFile.md
index 379f4ebffb2..047f219aa7a 100644
--- a/docs/en/connector-v2/source/LocalFile.md
+++ b/docs/en/connector-v2/source/LocalFile.md
@@ -255,6 +255,9 @@ Specifies Whether to process data using the tag attribute format.
 Filter pattern, which used for filtering files.
 
 The pattern follows standard regular expressions. For details, please refer to https://en.wikipedia.org/wiki/Regular_expression.
+There are some examples.
+
+File Structure Example:
 ```
 /data/seatunnel/20241001/report.txt
 /data/seatunnel/20241007/abch202410.csv

From 867a840cfec4ac88373587d5ed8d7782361e2047 Mon Sep 17 00:00:00 2001
From: "YOMO.LEE" <75362129@qq.com>
Date: Tue, 29 Oct 2024 15:42:01 +0800
Subject: [PATCH 14/17] [Fix][DOC] Additional explanation for the
 file_filter_pattern parameter

Please provide a description of all connectors that support the file_filter_pattern parameter
---
 docs/en/connector-v2/source/CosFile.md      | 124 ++++++++++++++++----
 docs/en/connector-v2/source/HdfsFile.md     |  79 ++++++++++++-
 docs/en/connector-v2/source/LocalFile.md    |  55 ++++-----
 docs/en/connector-v2/source/OssFile.md      |  76 ++++++++++++
 docs/en/connector-v2/source/OssJindoFile.md | 124 ++++++++++++++++----
 5 files changed, 382 insertions(+), 76 deletions(-)

diff --git a/docs/en/connector-v2/source/CosFile.md b/docs/en/connector-v2/source/CosFile.md
index 702439c3062..4b0675b9893 100644
--- a/docs/en/connector-v2/source/CosFile.md
+++ b/docs/en/connector-v2/source/CosFile.md
@@ -45,30 +45,30 @@ To use this connector you need put hadoop-cos-{hadoop.version}-{version}.jar and
 
 ## Options
 
-|           name            |  type   | required |    default value    |
-|---------------------------|---------|----------|---------------------|
-| path                      | string  | yes      | -                   |
-| file_format_type          | string  | yes      | -                   |
-| bucket                    | string  | yes      | -                   |
-| secret_id                 | string  | yes      | -                   |
-| secret_key                | string  | yes      | -                   |
-| region                    | string  | yes      | -                   |
-| read_columns              | list    | yes      | -                   |
-| delimiter/field_delimiter | string  | no       | \001                |
-| parse_partition_from_path | boolean | no       | true                |
-| skip_header_row_number    | long    | no       | 0                   |
-| date_format               | string  | no       | yyyy-MM-dd          |
-| datetime_format           | string  | no       | yyyy-MM-dd HH:mm:ss |
-| time_format               | string  | no       | HH:mm:ss            |
-| schema                    | config  | no       | -                   |
-| sheet_name                | string  | no       | -                   |
-| xml_row_tag               | string  | no       | -                   |
-| xml_use_attr_format       | boolean | no       | -                   |
-| file_filter_pattern       | string  | no       | -                   |
-| compress_codec            | string  | no       | none                |
-| archive_compress_codec    | string  | no       | none                |
-| encoding                  | string  | no       | UTF-8               |
-| common-options            |         | no       | -                   |
+|           name            |  type   | required | default value                                              |
+|---------------------------|---------|----------|------------------------------------------------------------|
+| path                      | string  | yes      | -                                                          |
+| file_format_type          | string  | yes      | -                                                          |
+| bucket                    | string  | yes      | -                                                          |
+| secret_id                 | string  | yes      | -                                                          |
+| secret_key                | string  | yes      | -                                                          |
+| region                    | string  | yes      | -                                                          |
+| read_columns              | list    | yes      | -                                                          |
+| delimiter/field_delimiter | string  | no       | \001                                                       |
+| parse_partition_from_path | boolean | no       | true                                                       |
+| skip_header_row_number    | long    | no       | 0                                                          |
+| date_format               | string  | no       | yyyy-MM-dd                                                 |
+| datetime_format           | string  | no       | yyyy-MM-dd HH:mm:ss                                        |
+| time_format               | string  | no       | HH:mm:ss                                                   |
+| schema                    | config  | no       | -                                                          |
+| sheet_name                | string  | no       | -                                                          |
+| xml_row_tag               | string  | no       | -                                                          |
+| xml_use_attr_format       | boolean | no       | -                                                          |
+| file_filter_pattern       | string  | no       | `*.txt` means you only need read the files end with `.txt` |
+| compress_codec            | string  | no       | none                                                       |
+| archive_compress_codec    | string  | no       | none                                                       |
+| encoding                  | string  | no       | UTF-8                                                      |
+| common-options            |         | no       | -                                                          |
 
 ### path [string]
 
@@ -275,6 +275,55 @@ Specifies Whether to process data using the tag attribute format.
 
 Filter pattern, which used for filtering files.
 
+The pattern follows standard regular expressions. For details, please refer to https://en.wikipedia.org/wiki/Regular_expression.
+There are some examples.
+
+File Structure Example:
+```
+/data/seatunnel/20241001/report.txt
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+/data/seatunnel/20241005/old_data.csv
+/data/seatunnel/20241012/logo.png
+```
+Matching Rules Example:
+
+**Example 1**: *Match all .txt files*，Regular Expression:
+```
+/data/seatunnel/20241001/.*\.txt
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241001/report.txt
+```
+**Example 2**: *Match all file starting with abc*，Regular Expression:
+```
+/data/seatunnel/20241002/abc.*
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+```
+**Example 3**: *Match all file starting with abc，And the fourth character is either h or g*, the Regular Expression:
+```
+/data/seatunnel/20241007/abc[h,g].*
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241007/abch202410.csv
+```
+**Example 4**: *Match third level folders starting with 202410 and files ending with .csv*, the Regular Expression:
+```
+/data/seatunnel/202410\d*/.*\.csv
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+/data/seatunnel/20241005/old_data.csv
+```
+
 ### compress_codec [string]
 
 The compress codec of files and the details that supported as the following shown:
@@ -372,6 +421,33 @@ sink {
 
 ```
 
+### Filter File
+
+```hocon
+env {
+  parallelism = 1
+  job.mode = "BATCH"
+}
+
+source {
+  CosFile {
+    bucket = "cosn://seatunnel-test-1259587829"
+    secret_id = "xxxxxxxxxxxxxxxxxxx"
+    secret_key = "xxxxxxxxxxxxxxxxxxx"
+    region = "ap-chengdu"
+    path = "/seatunnel/read/binary/"
+    file_format_type = "binary"
+    // file example abcD2024.csv
+    file_filter_pattern = "abc[DX]*.*"
+  }
+}
+
+sink {
+  Console {
+  }
+}
+```
+
 ## Changelog
 
 ### next version
diff --git a/docs/en/connector-v2/source/HdfsFile.md b/docs/en/connector-v2/source/HdfsFile.md
index 7413c0428b8..300d96f0f16 100644
--- a/docs/en/connector-v2/source/HdfsFile.md
+++ b/docs/en/connector-v2/source/HdfsFile.md
@@ -41,7 +41,7 @@ Read data from hdfs file system.
 
 ## Source Options
 
-|           Name            |  Type   | Required |       Default       |                                                                                                                                                                  Description                                                                                                                                                                  |
+|           Name            |  Type   | Required |       Default       | Description                                                                                                                                                                                                                                                                                                                                   |
 |---------------------------|---------|----------|---------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | path                      | string  | yes      | -                   | The source file path.                                                                                                                                                                                                                                                                                                                         |
 | file_format_type          | string  | yes      | -                   | We supported as the following file types:`text` `csv` `parquet` `orc` `json` `excel` `xml` `binary`.Please note that, The final file name will end with the file_format's suffix, the suffix of the text file is `txt`.                                                                                                                       |
@@ -62,6 +62,7 @@ Read data from hdfs file system.
 | sheet_name                | string  | no       | -                   | Reader the sheet of the workbook,Only used when file_format is excel.                                                                                                                                                                                                                                                                         |
 | xml_row_tag               | string  | no       | -                   | Specifies the tag name of the data rows within the XML file, only used when file_format is xml.                                                                                                                                                                                                                                               |
 | xml_use_attr_format       | boolean | no       | -                   | Specifies whether to process data using the tag attribute format, only used when file_format is xml.                                                                                                                                                                                                                                          |
+| file_filter_pattern       | string  | no       |                     | `*.txt` means you only need read the files end with `.txt`                                                                                                                                                                                                                                                                                    |
 | compress_codec            | string  | no       | none                | The compress codec of files                                                                                                                                                                                                                                                                                                                   |
 | archive_compress_codec    | string  | no       | none                |
 | encoding                  | string  | no       | UTF-8               |                                                                                                                                                                                                                                                                                                                                               |
@@ -71,6 +72,59 @@ Read data from hdfs file system.
 
 **delimiter** parameter will deprecate after version 2.3.5, please use **field_delimiter** instead.
 
+### file_filter_pattern [string]
+
+Filter pattern, which used for filtering files.
+
+The pattern follows standard regular expressions. For details, please refer to https://en.wikipedia.org/wiki/Regular_expression.
+There are some examples.
+
+File Structure Example:
+```
+/data/seatunnel/20241001/report.txt
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+/data/seatunnel/20241005/old_data.csv
+/data/seatunnel/20241012/logo.png
+```
+Matching Rules Example:
+
+**Example 1**: *Match all .txt files*，Regular Expression:
+```
+/data/seatunnel/20241001/.*\.txt
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241001/report.txt
+```
+**Example 2**: *Match all file starting with abc*，Regular Expression:
+```
+/data/seatunnel/20241002/abc.*
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+```
+**Example 3**: *Match all file starting with abc，And the fourth character is either h or g*, the Regular Expression:
+```
+/data/seatunnel/20241007/abc[h,g].*
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241007/abch202410.csv
+```
+**Example 4**: *Match third level folders starting with 202410 and files ending with .csv*, the Regular Expression:
+```
+/data/seatunnel/202410\d*/.*\.csv
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+/data/seatunnel/20241005/old_data.csv
+```
+
 ### compress_codec [string]
 
 The compress codec of files and the details that supported as the following shown:
@@ -146,3 +200,26 @@ sink {
 }
 ```
 
+### Filter File
+
+```hocon
+env {
+  parallelism = 1
+  job.mode = "BATCH"
+}
+
+source {
+  HdfsFile {
+    path = "/apps/hive/demo/student"
+    file_format_type = "json"
+    fs.defaultFS = "hdfs://namenode001"
+    // file example abcD2024.csv
+    file_filter_pattern = "abc[DX]*.*"
+  }
+}
+
+sink {
+  Console {
+  }
+}
+```
diff --git a/docs/en/connector-v2/source/LocalFile.md b/docs/en/connector-v2/source/LocalFile.md
index 047f219aa7a..fb0e8de9139 100644
--- a/docs/en/connector-v2/source/LocalFile.md
+++ b/docs/en/connector-v2/source/LocalFile.md
@@ -43,27 +43,27 @@ If you use SeaTunnel Engine, It automatically integrated the hadoop jar when you
 
 ## Options
 
-|           name            |  type   | required |            default value             |
-|---------------------------|---------|----------|--------------------------------------|
-| path                      | string  | yes      | -                                    |
-| file_format_type          | string  | yes      | -                                    |
-| read_columns              | list    | no       | -                                    |
-| delimiter/field_delimiter | string  | no       | \001                                 |
-| parse_partition_from_path | boolean | no       | true                                 |
-| date_format               | string  | no       | yyyy-MM-dd                           |
-| datetime_format           | string  | no       | yyyy-MM-dd HH:mm:ss                  |
-| time_format               | string  | no       | HH:mm:ss                             |
-| skip_header_row_number    | long    | no       | 0                                    |
-| schema                    | config  | no       | -                                    |
-| sheet_name                | string  | no       | -                                    |
-| xml_row_tag               | string  | no       | -                                    |
-| xml_use_attr_format       | boolean | no       | -                                    |
-| file_filter_pattern       | string  | no       | -                                    |
-| compress_codec            | string  | no       | none                                 |
-| archive_compress_codec    | string  | no       | none                                 |
-| encoding                  | string  | no       | UTF-8                                |
-| common-options            |         | no       | -                                    |
-| tables_configs            | list    | no       | used to define a multiple table task |
+|           name            |  type   | required | default value                                              |
+|---------------------------|---------|----------|------------------------------------------------------------|
+| path                      | string  | yes      | -                                                          |
+| file_format_type          | string  | yes      | -                                                          |
+| read_columns              | list    | no       | -                                                          |
+| delimiter/field_delimiter | string  | no       | \001                                                       |
+| parse_partition_from_path | boolean | no       | true                                                       |
+| date_format               | string  | no       | yyyy-MM-dd                                                 |
+| datetime_format           | string  | no       | yyyy-MM-dd HH:mm:ss                                        |
+| time_format               | string  | no       | HH:mm:ss                                                   |
+| skip_header_row_number    | long    | no       | 0                                                          |
+| schema                    | config  | no       | -                                                          |
+| sheet_name                | string  | no       | -                                                          |
+| xml_row_tag               | string  | no       | -                                                          |
+| xml_use_attr_format       | boolean | no       | -                                                          |
+| file_filter_pattern       | string  | no       | `*.txt` means you only need read the files end with `.txt` |
+| compress_codec            | string  | no       | none                                                       |
+| archive_compress_codec    | string  | no       | none                                                       |
+| encoding                  | string  | no       | UTF-8                                                      |
+| common-options            |         | no       | -                                                          |
+| tables_configs            | list    | no       | used to define a multiple table task                       |
 
 ### path [string]
 
@@ -269,7 +269,7 @@ Matching Rules Example:
 
 **Example 1**: *Match all .txt files*，Regular Expression:
 ```
-/data/seatunnel/202410\d*/.*.txt
+/data/seatunnel/20241001/.*\.txt
 ```
 The result of this example matching is:
 ```
@@ -277,24 +277,24 @@ The result of this example matching is:
 ```
 **Example 2**: *Match all file starting with abc*，Regular Expression:
 ```
-/data/seatunnel/202410\d*/abc.*
+/data/seatunnel/20241002/abc.*
 ```
 The result of this example matching is:
 ```
 /data/seatunnel/20241007/abch202410.csv
 /data/seatunnel/20241002/abcg202410.csv
 ```
-**Example 3**: *Match all file starting with abc，And the fourth character is either x or g*, the Regular Expression:
+**Example 3**: *Match all file starting with abc，And the fourth character is either h or g*, the Regular Expression:
 ```
-/data/seatunnel/20241002/abc[x,g].*
+/data/seatunnel/20241007/abc[h,g].*
 ```
 The result of this example matching is:
 ```
-/data/seatunnel/20241002/abcg202410.csv
+/data/seatunnel/20241007/abch202410.csv
 ```
 **Example 4**: *Match third level folders starting with 202410 and files ending with .csv*, the Regular Expression:
 ```
-/data/seatunnel/202410\d*/.*.csv
+/data/seatunnel/202410\d*/.*\.csv
 ```
 The result of this example matching is:
 ```
@@ -472,6 +472,7 @@ source {
     file_filter_pattern = "abc[DX]*.*"
   }
 }
+
 sink {
   Console {
   }
diff --git a/docs/en/connector-v2/source/OssFile.md b/docs/en/connector-v2/source/OssFile.md
index d5326cb86a4..01bbde41c1e 100644
--- a/docs/en/connector-v2/source/OssFile.md
+++ b/docs/en/connector-v2/source/OssFile.md
@@ -233,6 +233,55 @@ The encoding of the file to read. This param will be parsed by `Charset.forName(
 
 Filter pattern, which used for filtering files.
 
+The pattern follows standard regular expressions. For details, please refer to https://en.wikipedia.org/wiki/Regular_expression.
+There are some examples.
+
+File Structure Example:
+```
+/data/seatunnel/20241001/report.txt
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+/data/seatunnel/20241005/old_data.csv
+/data/seatunnel/20241012/logo.png
+```
+Matching Rules Example:
+
+**Example 1**: *Match all .txt files*，Regular Expression:
+```
+/data/seatunnel/20241001/.*\.txt
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241001/report.txt
+```
+**Example 2**: *Match all file starting with abc*，Regular Expression:
+```
+/data/seatunnel/20241002/abc.*
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+```
+**Example 3**: *Match all file starting with abc，And the fourth character is either h or g*, the Regular Expression:
+```
+/data/seatunnel/20241007/abc[h,g].*
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241007/abch202410.csv
+```
+**Example 4**: *Match third level folders starting with 202410 and files ending with .csv*, the Regular Expression:
+```
+/data/seatunnel/202410\d*/.*\.csv
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+/data/seatunnel/20241005/old_data.csv
+```
+
 ### schema [config]
 
 Only need to be configured when the file_format_type are text, json, excel, xml or csv ( Or other format we can't read the schema from metadata).
@@ -474,6 +523,33 @@ sink {
 }
 ```
 
+### Filter File
+
+```hocon
+env {
+  parallelism = 1
+  job.mode = "BATCH"
+}
+
+source {
+  OssFile {
+    path = "/seatunnel/orc"
+    bucket = "oss://tyrantlucifer-image-bed"
+    access_key = "xxxxxxxxxxxxxxxxx"
+    access_secret = "xxxxxxxxxxxxxxxxxxxxxx"
+    endpoint = "oss-cn-beijing.aliyuncs.com"
+    file_format_type = "orc"
+    // file example abcD2024.csv
+    file_filter_pattern = "abc[DX]*.*"
+  }
+}
+
+sink {
+  Console {
+  }
+}
+```
+
 ## Changelog
 
 ### 2.2.0-beta 2022-09-26
diff --git a/docs/en/connector-v2/source/OssJindoFile.md b/docs/en/connector-v2/source/OssJindoFile.md
index d5bd6d14fa3..97e6981aea0 100644
--- a/docs/en/connector-v2/source/OssJindoFile.md
+++ b/docs/en/connector-v2/source/OssJindoFile.md
@@ -49,30 +49,30 @@ It only supports hadoop version **2.9.X+**.
 
 ## Options
 
-|           name            |  type   | required |    default value    |
-|---------------------------|---------|----------|---------------------|
-| path                      | string  | yes      | -                   |
-| file_format_type          | string  | yes      | -                   |
-| bucket                    | string  | yes      | -                   |
-| access_key                | string  | yes      | -                   |
-| access_secret             | string  | yes      | -                   |
-| endpoint                  | string  | yes      | -                   |
-| read_columns              | list    | no       | -                   |
-| delimiter/field_delimiter | string  | no       | \001                |
-| parse_partition_from_path | boolean | no       | true                |
-| date_format               | string  | no       | yyyy-MM-dd          |
-| datetime_format           | string  | no       | yyyy-MM-dd HH:mm:ss |
-| time_format               | string  | no       | HH:mm:ss            |
-| skip_header_row_number    | long    | no       | 0                   |
-| schema                    | config  | no       | -                   |
-| sheet_name                | string  | no       | -                   |
-| xml_row_tag               | string  | no       | -                   |
-| xml_use_attr_format       | boolean | no       | -                   |
-| file_filter_pattern       | string  | no       | -                   |
-| compress_codec            | string  | no       | none                |
-| archive_compress_codec    | string  | no       | none                |
-| encoding                  | string  | no       | UTF-8               |
-| common-options            |         | no       | -                   |
+|           name            |  type   | required | default value                                              |
+|---------------------------|---------|----------|------------------------------------------------------------|
+| path                      | string  | yes      | -                                                          |
+| file_format_type          | string  | yes      | -                                                          |
+| bucket                    | string  | yes      | -                                                          |
+| access_key                | string  | yes      | -                                                          |
+| access_secret             | string  | yes      | -                                                          |
+| endpoint                  | string  | yes      | -                                                          |
+| read_columns              | list    | no       | -                                                          |
+| delimiter/field_delimiter | string  | no       | \001                                                       |
+| parse_partition_from_path | boolean | no       | true                                                       |
+| date_format               | string  | no       | yyyy-MM-dd                                                 |
+| datetime_format           | string  | no       | yyyy-MM-dd HH:mm:ss                                        |
+| time_format               | string  | no       | HH:mm:ss                                                   |
+| skip_header_row_number    | long    | no       | 0                                                          |
+| schema                    | config  | no       | -                                                          |
+| sheet_name                | string  | no       | -                                                          |
+| xml_row_tag               | string  | no       | -                                                          |
+| xml_use_attr_format       | boolean | no       | -                                                          |
+| file_filter_pattern       | string  | no       | `*.txt` means you only need read the files end with `.txt` |
+| compress_codec            | string  | no       | none                                                       |
+| archive_compress_codec    | string  | no       | none                                                       |
+| encoding                  | string  | no       | UTF-8                                                      |
+| common-options            |         | no       | -                                                          |
 
 ### path [string]
 
@@ -267,6 +267,55 @@ Reader the sheet of the workbook.
 
 Filter pattern, which used for filtering files.
 
+The pattern follows standard regular expressions. For details, please refer to https://en.wikipedia.org/wiki/Regular_expression.
+There are some examples.
+
+File Structure Example:
+```
+/data/seatunnel/20241001/report.txt
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+/data/seatunnel/20241005/old_data.csv
+/data/seatunnel/20241012/logo.png
+```
+Matching Rules Example:
+
+**Example 1**: *Match all .txt files*，Regular Expression:
+```
+/data/seatunnel/20241001/.*\.txt
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241001/report.txt
+```
+**Example 2**: *Match all file starting with abc*，Regular Expression:
+```
+/data/seatunnel/20241002/abc.*
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+```
+**Example 3**: *Match all file starting with abc，And the fourth character is either h or g*, the Regular Expression:
+```
+/data/seatunnel/20241007/abc[h,g].*
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241007/abch202410.csv
+```
+**Example 4**: *Match third level folders starting with 202410 and files ending with .csv*, the Regular Expression:
+```
+/data/seatunnel/202410\d*/.*\.csv
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+/data/seatunnel/20241005/old_data.csv
+```
+
 ### compress_codec [string]
 
 The compress codec of files and the details that supported as the following shown:
@@ -364,6 +413,33 @@ sink {
 
 ```
 
+### Filter File
+
+```hocon
+env {
+  parallelism = 1
+  job.mode = "BATCH"
+}
+
+source {
+  OssJindoFile {
+    bucket = "oss://tyrantlucifer-image-bed"
+    access_key = "xxxxxxxxxxxxxxxxx"
+    access_secret = "xxxxxxxxxxxxxxxxxxxxxx"
+    endpoint = "oss-cn-beijing.aliyuncs.com"
+    path = "/seatunnel/read/binary/"
+    file_format_type = "binary"
+    // file example abcD2024.csv
+    file_filter_pattern = "abc[DX]*.*"
+  }
+}
+
+sink {
+  Console {
+  }
+}
+```
+
 ## Changelog
 
 ### next version

From c1e8f09afed0dc662be2a3f2b4c70f51181c8b6c Mon Sep 17 00:00:00 2001
From: "YOMO.LEE" <75362129@qq.com>
Date: Tue, 29 Oct 2024 15:42:01 +0800
Subject: [PATCH 15/17] [Fix][DOC] Additional explanation for the
 file_filter_pattern parameter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Added the following file connector description about file_filter_pattern：
CosFile(en)、OssFile(en)、OssJindoFile(en)、HdfsFile(en)
---
 docs/en/connector-v2/source/CosFile.md      | 124 ++++++++++++++++----
 docs/en/connector-v2/source/HdfsFile.md     |  79 ++++++++++++-
 docs/en/connector-v2/source/LocalFile.md    |  55 ++++-----
 docs/en/connector-v2/source/OssFile.md      |  76 ++++++++++++
 docs/en/connector-v2/source/OssJindoFile.md | 124 ++++++++++++++++----
 5 files changed, 382 insertions(+), 76 deletions(-)

diff --git a/docs/en/connector-v2/source/CosFile.md b/docs/en/connector-v2/source/CosFile.md
index 702439c3062..4b0675b9893 100644
--- a/docs/en/connector-v2/source/CosFile.md
+++ b/docs/en/connector-v2/source/CosFile.md
@@ -45,30 +45,30 @@ To use this connector you need put hadoop-cos-{hadoop.version}-{version}.jar and
 
 ## Options
 
-|           name            |  type   | required |    default value    |
-|---------------------------|---------|----------|---------------------|
-| path                      | string  | yes      | -                   |
-| file_format_type          | string  | yes      | -                   |
-| bucket                    | string  | yes      | -                   |
-| secret_id                 | string  | yes      | -                   |
-| secret_key                | string  | yes      | -                   |
-| region                    | string  | yes      | -                   |
-| read_columns              | list    | yes      | -                   |
-| delimiter/field_delimiter | string  | no       | \001                |
-| parse_partition_from_path | boolean | no       | true                |
-| skip_header_row_number    | long    | no       | 0                   |
-| date_format               | string  | no       | yyyy-MM-dd          |
-| datetime_format           | string  | no       | yyyy-MM-dd HH:mm:ss |
-| time_format               | string  | no       | HH:mm:ss            |
-| schema                    | config  | no       | -                   |
-| sheet_name                | string  | no       | -                   |
-| xml_row_tag               | string  | no       | -                   |
-| xml_use_attr_format       | boolean | no       | -                   |
-| file_filter_pattern       | string  | no       | -                   |
-| compress_codec            | string  | no       | none                |
-| archive_compress_codec    | string  | no       | none                |
-| encoding                  | string  | no       | UTF-8               |
-| common-options            |         | no       | -                   |
+|           name            |  type   | required | default value                                              |
+|---------------------------|---------|----------|------------------------------------------------------------|
+| path                      | string  | yes      | -                                                          |
+| file_format_type          | string  | yes      | -                                                          |
+| bucket                    | string  | yes      | -                                                          |
+| secret_id                 | string  | yes      | -                                                          |
+| secret_key                | string  | yes      | -                                                          |
+| region                    | string  | yes      | -                                                          |
+| read_columns              | list    | yes      | -                                                          |
+| delimiter/field_delimiter | string  | no       | \001                                                       |
+| parse_partition_from_path | boolean | no       | true                                                       |
+| skip_header_row_number    | long    | no       | 0                                                          |
+| date_format               | string  | no       | yyyy-MM-dd                                                 |
+| datetime_format           | string  | no       | yyyy-MM-dd HH:mm:ss                                        |
+| time_format               | string  | no       | HH:mm:ss                                                   |
+| schema                    | config  | no       | -                                                          |
+| sheet_name                | string  | no       | -                                                          |
+| xml_row_tag               | string  | no       | -                                                          |
+| xml_use_attr_format       | boolean | no       | -                                                          |
+| file_filter_pattern       | string  | no       | `*.txt` means you only need read the files end with `.txt` |
+| compress_codec            | string  | no       | none                                                       |
+| archive_compress_codec    | string  | no       | none                                                       |
+| encoding                  | string  | no       | UTF-8                                                      |
+| common-options            |         | no       | -                                                          |
 
 ### path [string]
 
@@ -275,6 +275,55 @@ Specifies Whether to process data using the tag attribute format.
 
 Filter pattern, which used for filtering files.
 
+The pattern follows standard regular expressions. For details, please refer to https://en.wikipedia.org/wiki/Regular_expression.
+There are some examples.
+
+File Structure Example:
+```
+/data/seatunnel/20241001/report.txt
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+/data/seatunnel/20241005/old_data.csv
+/data/seatunnel/20241012/logo.png
+```
+Matching Rules Example:
+
+**Example 1**: *Match all .txt files*，Regular Expression:
+```
+/data/seatunnel/20241001/.*\.txt
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241001/report.txt
+```
+**Example 2**: *Match all file starting with abc*，Regular Expression:
+```
+/data/seatunnel/20241002/abc.*
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+```
+**Example 3**: *Match all file starting with abc，And the fourth character is either h or g*, the Regular Expression:
+```
+/data/seatunnel/20241007/abc[h,g].*
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241007/abch202410.csv
+```
+**Example 4**: *Match third level folders starting with 202410 and files ending with .csv*, the Regular Expression:
+```
+/data/seatunnel/202410\d*/.*\.csv
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+/data/seatunnel/20241005/old_data.csv
+```
+
 ### compress_codec [string]
 
 The compress codec of files and the details that supported as the following shown:
@@ -372,6 +421,33 @@ sink {
 
 ```
 
+### Filter File
+
+```hocon
+env {
+  parallelism = 1
+  job.mode = "BATCH"
+}
+
+source {
+  CosFile {
+    bucket = "cosn://seatunnel-test-1259587829"
+    secret_id = "xxxxxxxxxxxxxxxxxxx"
+    secret_key = "xxxxxxxxxxxxxxxxxxx"
+    region = "ap-chengdu"
+    path = "/seatunnel/read/binary/"
+    file_format_type = "binary"
+    // file example abcD2024.csv
+    file_filter_pattern = "abc[DX]*.*"
+  }
+}
+
+sink {
+  Console {
+  }
+}
+```
+
 ## Changelog
 
 ### next version
diff --git a/docs/en/connector-v2/source/HdfsFile.md b/docs/en/connector-v2/source/HdfsFile.md
index 7413c0428b8..300d96f0f16 100644
--- a/docs/en/connector-v2/source/HdfsFile.md
+++ b/docs/en/connector-v2/source/HdfsFile.md
@@ -41,7 +41,7 @@ Read data from hdfs file system.
 
 ## Source Options
 
-|           Name            |  Type   | Required |       Default       |                                                                                                                                                                  Description                                                                                                                                                                  |
+|           Name            |  Type   | Required |       Default       | Description                                                                                                                                                                                                                                                                                                                                   |
 |---------------------------|---------|----------|---------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | path                      | string  | yes      | -                   | The source file path.                                                                                                                                                                                                                                                                                                                         |
 | file_format_type          | string  | yes      | -                   | We supported as the following file types:`text` `csv` `parquet` `orc` `json` `excel` `xml` `binary`.Please note that, The final file name will end with the file_format's suffix, the suffix of the text file is `txt`.                                                                                                                       |
@@ -62,6 +62,7 @@ Read data from hdfs file system.
 | sheet_name                | string  | no       | -                   | Reader the sheet of the workbook,Only used when file_format is excel.                                                                                                                                                                                                                                                                         |
 | xml_row_tag               | string  | no       | -                   | Specifies the tag name of the data rows within the XML file, only used when file_format is xml.                                                                                                                                                                                                                                               |
 | xml_use_attr_format       | boolean | no       | -                   | Specifies whether to process data using the tag attribute format, only used when file_format is xml.                                                                                                                                                                                                                                          |
+| file_filter_pattern       | string  | no       |                     | `*.txt` means you only need read the files end with `.txt`                                                                                                                                                                                                                                                                                    |
 | compress_codec            | string  | no       | none                | The compress codec of files                                                                                                                                                                                                                                                                                                                   |
 | archive_compress_codec    | string  | no       | none                |
 | encoding                  | string  | no       | UTF-8               |                                                                                                                                                                                                                                                                                                                                               |
@@ -71,6 +72,59 @@ Read data from hdfs file system.
 
 **delimiter** parameter will deprecate after version 2.3.5, please use **field_delimiter** instead.
 
+### file_filter_pattern [string]
+
+Filter pattern, which used for filtering files.
+
+The pattern follows standard regular expressions. For details, please refer to https://en.wikipedia.org/wiki/Regular_expression.
+There are some examples.
+
+File Structure Example:
+```
+/data/seatunnel/20241001/report.txt
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+/data/seatunnel/20241005/old_data.csv
+/data/seatunnel/20241012/logo.png
+```
+Matching Rules Example:
+
+**Example 1**: *Match all .txt files*，Regular Expression:
+```
+/data/seatunnel/20241001/.*\.txt
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241001/report.txt
+```
+**Example 2**: *Match all file starting with abc*，Regular Expression:
+```
+/data/seatunnel/20241002/abc.*
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+```
+**Example 3**: *Match all file starting with abc，And the fourth character is either h or g*, the Regular Expression:
+```
+/data/seatunnel/20241007/abc[h,g].*
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241007/abch202410.csv
+```
+**Example 4**: *Match third level folders starting with 202410 and files ending with .csv*, the Regular Expression:
+```
+/data/seatunnel/202410\d*/.*\.csv
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+/data/seatunnel/20241005/old_data.csv
+```
+
 ### compress_codec [string]
 
 The compress codec of files and the details that supported as the following shown:
@@ -146,3 +200,26 @@ sink {
 }
 ```
 
+### Filter File
+
+```hocon
+env {
+  parallelism = 1
+  job.mode = "BATCH"
+}
+
+source {
+  HdfsFile {
+    path = "/apps/hive/demo/student"
+    file_format_type = "json"
+    fs.defaultFS = "hdfs://namenode001"
+    // file example abcD2024.csv
+    file_filter_pattern = "abc[DX]*.*"
+  }
+}
+
+sink {
+  Console {
+  }
+}
+```
diff --git a/docs/en/connector-v2/source/LocalFile.md b/docs/en/connector-v2/source/LocalFile.md
index 047f219aa7a..fb0e8de9139 100644
--- a/docs/en/connector-v2/source/LocalFile.md
+++ b/docs/en/connector-v2/source/LocalFile.md
@@ -43,27 +43,27 @@ If you use SeaTunnel Engine, It automatically integrated the hadoop jar when you
 
 ## Options
 
-|           name            |  type   | required |            default value             |
-|---------------------------|---------|----------|--------------------------------------|
-| path                      | string  | yes      | -                                    |
-| file_format_type          | string  | yes      | -                                    |
-| read_columns              | list    | no       | -                                    |
-| delimiter/field_delimiter | string  | no       | \001                                 |
-| parse_partition_from_path | boolean | no       | true                                 |
-| date_format               | string  | no       | yyyy-MM-dd                           |
-| datetime_format           | string  | no       | yyyy-MM-dd HH:mm:ss                  |
-| time_format               | string  | no       | HH:mm:ss                             |
-| skip_header_row_number    | long    | no       | 0                                    |
-| schema                    | config  | no       | -                                    |
-| sheet_name                | string  | no       | -                                    |
-| xml_row_tag               | string  | no       | -                                    |
-| xml_use_attr_format       | boolean | no       | -                                    |
-| file_filter_pattern       | string  | no       | -                                    |
-| compress_codec            | string  | no       | none                                 |
-| archive_compress_codec    | string  | no       | none                                 |
-| encoding                  | string  | no       | UTF-8                                |
-| common-options            |         | no       | -                                    |
-| tables_configs            | list    | no       | used to define a multiple table task |
+|           name            |  type   | required | default value                                              |
+|---------------------------|---------|----------|------------------------------------------------------------|
+| path                      | string  | yes      | -                                                          |
+| file_format_type          | string  | yes      | -                                                          |
+| read_columns              | list    | no       | -                                                          |
+| delimiter/field_delimiter | string  | no       | \001                                                       |
+| parse_partition_from_path | boolean | no       | true                                                       |
+| date_format               | string  | no       | yyyy-MM-dd                                                 |
+| datetime_format           | string  | no       | yyyy-MM-dd HH:mm:ss                                        |
+| time_format               | string  | no       | HH:mm:ss                                                   |
+| skip_header_row_number    | long    | no       | 0                                                          |
+| schema                    | config  | no       | -                                                          |
+| sheet_name                | string  | no       | -                                                          |
+| xml_row_tag               | string  | no       | -                                                          |
+| xml_use_attr_format       | boolean | no       | -                                                          |
+| file_filter_pattern       | string  | no       | `*.txt` means you only need read the files end with `.txt` |
+| compress_codec            | string  | no       | none                                                       |
+| archive_compress_codec    | string  | no       | none                                                       |
+| encoding                  | string  | no       | UTF-8                                                      |
+| common-options            |         | no       | -                                                          |
+| tables_configs            | list    | no       | used to define a multiple table task                       |
 
 ### path [string]
 
@@ -269,7 +269,7 @@ Matching Rules Example:
 
 **Example 1**: *Match all .txt files*，Regular Expression:
 ```
-/data/seatunnel/202410\d*/.*.txt
+/data/seatunnel/20241001/.*\.txt
 ```
 The result of this example matching is:
 ```
@@ -277,24 +277,24 @@ The result of this example matching is:
 ```
 **Example 2**: *Match all file starting with abc*，Regular Expression:
 ```
-/data/seatunnel/202410\d*/abc.*
+/data/seatunnel/20241002/abc.*
 ```
 The result of this example matching is:
 ```
 /data/seatunnel/20241007/abch202410.csv
 /data/seatunnel/20241002/abcg202410.csv
 ```
-**Example 3**: *Match all file starting with abc，And the fourth character is either x or g*, the Regular Expression:
+**Example 3**: *Match all file starting with abc，And the fourth character is either h or g*, the Regular Expression:
 ```
-/data/seatunnel/20241002/abc[x,g].*
+/data/seatunnel/20241007/abc[h,g].*
 ```
 The result of this example matching is:
 ```
-/data/seatunnel/20241002/abcg202410.csv
+/data/seatunnel/20241007/abch202410.csv
 ```
 **Example 4**: *Match third level folders starting with 202410 and files ending with .csv*, the Regular Expression:
 ```
-/data/seatunnel/202410\d*/.*.csv
+/data/seatunnel/202410\d*/.*\.csv
 ```
 The result of this example matching is:
 ```
@@ -472,6 +472,7 @@ source {
     file_filter_pattern = "abc[DX]*.*"
   }
 }
+
 sink {
   Console {
   }
diff --git a/docs/en/connector-v2/source/OssFile.md b/docs/en/connector-v2/source/OssFile.md
index d5326cb86a4..01bbde41c1e 100644
--- a/docs/en/connector-v2/source/OssFile.md
+++ b/docs/en/connector-v2/source/OssFile.md
@@ -233,6 +233,55 @@ The encoding of the file to read. This param will be parsed by `Charset.forName(
 
 Filter pattern, which used for filtering files.
 
+The pattern follows standard regular expressions. For details, please refer to https://en.wikipedia.org/wiki/Regular_expression.
+There are some examples.
+
+File Structure Example:
+```
+/data/seatunnel/20241001/report.txt
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+/data/seatunnel/20241005/old_data.csv
+/data/seatunnel/20241012/logo.png
+```
+Matching Rules Example:
+
+**Example 1**: *Match all .txt files*，Regular Expression:
+```
+/data/seatunnel/20241001/.*\.txt
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241001/report.txt
+```
+**Example 2**: *Match all file starting with abc*，Regular Expression:
+```
+/data/seatunnel/20241002/abc.*
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+```
+**Example 3**: *Match all file starting with abc，And the fourth character is either h or g*, the Regular Expression:
+```
+/data/seatunnel/20241007/abc[h,g].*
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241007/abch202410.csv
+```
+**Example 4**: *Match third level folders starting with 202410 and files ending with .csv*, the Regular Expression:
+```
+/data/seatunnel/202410\d*/.*\.csv
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+/data/seatunnel/20241005/old_data.csv
+```
+
 ### schema [config]
 
 Only need to be configured when the file_format_type are text, json, excel, xml or csv ( Or other format we can't read the schema from metadata).
@@ -474,6 +523,33 @@ sink {
 }
 ```
 
+### Filter File
+
+```hocon
+env {
+  parallelism = 1
+  job.mode = "BATCH"
+}
+
+source {
+  OssFile {
+    path = "/seatunnel/orc"
+    bucket = "oss://tyrantlucifer-image-bed"
+    access_key = "xxxxxxxxxxxxxxxxx"
+    access_secret = "xxxxxxxxxxxxxxxxxxxxxx"
+    endpoint = "oss-cn-beijing.aliyuncs.com"
+    file_format_type = "orc"
+    // file example abcD2024.csv
+    file_filter_pattern = "abc[DX]*.*"
+  }
+}
+
+sink {
+  Console {
+  }
+}
+```
+
 ## Changelog
 
 ### 2.2.0-beta 2022-09-26
diff --git a/docs/en/connector-v2/source/OssJindoFile.md b/docs/en/connector-v2/source/OssJindoFile.md
index d5bd6d14fa3..97e6981aea0 100644
--- a/docs/en/connector-v2/source/OssJindoFile.md
+++ b/docs/en/connector-v2/source/OssJindoFile.md
@@ -49,30 +49,30 @@ It only supports hadoop version **2.9.X+**.
 
 ## Options
 
-|           name            |  type   | required |    default value    |
-|---------------------------|---------|----------|---------------------|
-| path                      | string  | yes      | -                   |
-| file_format_type          | string  | yes      | -                   |
-| bucket                    | string  | yes      | -                   |
-| access_key                | string  | yes      | -                   |
-| access_secret             | string  | yes      | -                   |
-| endpoint                  | string  | yes      | -                   |
-| read_columns              | list    | no       | -                   |
-| delimiter/field_delimiter | string  | no       | \001                |
-| parse_partition_from_path | boolean | no       | true                |
-| date_format               | string  | no       | yyyy-MM-dd          |
-| datetime_format           | string  | no       | yyyy-MM-dd HH:mm:ss |
-| time_format               | string  | no       | HH:mm:ss            |
-| skip_header_row_number    | long    | no       | 0                   |
-| schema                    | config  | no       | -                   |
-| sheet_name                | string  | no       | -                   |
-| xml_row_tag               | string  | no       | -                   |
-| xml_use_attr_format       | boolean | no       | -                   |
-| file_filter_pattern       | string  | no       | -                   |
-| compress_codec            | string  | no       | none                |
-| archive_compress_codec    | string  | no       | none                |
-| encoding                  | string  | no       | UTF-8               |
-| common-options            |         | no       | -                   |
+|           name            |  type   | required | default value                                              |
+|---------------------------|---------|----------|------------------------------------------------------------|
+| path                      | string  | yes      | -                                                          |
+| file_format_type          | string  | yes      | -                                                          |
+| bucket                    | string  | yes      | -                                                          |
+| access_key                | string  | yes      | -                                                          |
+| access_secret             | string  | yes      | -                                                          |
+| endpoint                  | string  | yes      | -                                                          |
+| read_columns              | list    | no       | -                                                          |
+| delimiter/field_delimiter | string  | no       | \001                                                       |
+| parse_partition_from_path | boolean | no       | true                                                       |
+| date_format               | string  | no       | yyyy-MM-dd                                                 |
+| datetime_format           | string  | no       | yyyy-MM-dd HH:mm:ss                                        |
+| time_format               | string  | no       | HH:mm:ss                                                   |
+| skip_header_row_number    | long    | no       | 0                                                          |
+| schema                    | config  | no       | -                                                          |
+| sheet_name                | string  | no       | -                                                          |
+| xml_row_tag               | string  | no       | -                                                          |
+| xml_use_attr_format       | boolean | no       | -                                                          |
+| file_filter_pattern       | string  | no       | `*.txt` means you only need read the files end with `.txt` |
+| compress_codec            | string  | no       | none                                                       |
+| archive_compress_codec    | string  | no       | none                                                       |
+| encoding                  | string  | no       | UTF-8                                                      |
+| common-options            |         | no       | -                                                          |
 
 ### path [string]
 
@@ -267,6 +267,55 @@ Reader the sheet of the workbook.
 
 Filter pattern, which used for filtering files.
 
+The pattern follows standard regular expressions. For details, please refer to https://en.wikipedia.org/wiki/Regular_expression.
+There are some examples.
+
+File Structure Example:
+```
+/data/seatunnel/20241001/report.txt
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+/data/seatunnel/20241005/old_data.csv
+/data/seatunnel/20241012/logo.png
+```
+Matching Rules Example:
+
+**Example 1**: *Match all .txt files*，Regular Expression:
+```
+/data/seatunnel/20241001/.*\.txt
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241001/report.txt
+```
+**Example 2**: *Match all file starting with abc*，Regular Expression:
+```
+/data/seatunnel/20241002/abc.*
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+```
+**Example 3**: *Match all file starting with abc，And the fourth character is either h or g*, the Regular Expression:
+```
+/data/seatunnel/20241007/abc[h,g].*
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241007/abch202410.csv
+```
+**Example 4**: *Match third level folders starting with 202410 and files ending with .csv*, the Regular Expression:
+```
+/data/seatunnel/202410\d*/.*\.csv
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+/data/seatunnel/20241005/old_data.csv
+```
+
 ### compress_codec [string]
 
 The compress codec of files and the details that supported as the following shown:
@@ -364,6 +413,33 @@ sink {
 
 ```
 
+### Filter File
+
+```hocon
+env {
+  parallelism = 1
+  job.mode = "BATCH"
+}
+
+source {
+  OssJindoFile {
+    bucket = "oss://tyrantlucifer-image-bed"
+    access_key = "xxxxxxxxxxxxxxxxx"
+    access_secret = "xxxxxxxxxxxxxxxxxxxxxx"
+    endpoint = "oss-cn-beijing.aliyuncs.com"
+    path = "/seatunnel/read/binary/"
+    file_format_type = "binary"
+    // file example abcD2024.csv
+    file_filter_pattern = "abc[DX]*.*"
+  }
+}
+
+sink {
+  Console {
+  }
+}
+```
+
 ## Changelog
 
 ### next version

From 9b1479712192f14f2f02c13e56edbc8b6aa0aa8d Mon Sep 17 00:00:00 2001
From: "YOMO.LEE" <75362129@qq.com>
Date: Tue, 29 Oct 2024 17:12:07 +0800
Subject: [PATCH 16/17] [Fix][DOC] Additional explanation for the
 file_filter_pattern parameter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Added the following file connector description about file_filter_pattern：
FtpFile(en)、SftpFile(en)、S3File(en)、HdfsFile(zh)
---
 docs/en/connector-v2/source/FtpFile.md  | 81 ++++++++++++++++++++++++
 docs/en/connector-v2/source/S3File.md   | 83 ++++++++++++++++++++++++-
 docs/en/connector-v2/source/SftpFile.md | 83 ++++++++++++++++++++++++-
 docs/zh/connector-v2/source/HdfsFile.md | 77 ++++++++++++++++++++++-
 4 files changed, 321 insertions(+), 3 deletions(-)

diff --git a/docs/en/connector-v2/source/FtpFile.md b/docs/en/connector-v2/source/FtpFile.md
index ec02f77f9f7..3fe7e6b88f3 100644
--- a/docs/en/connector-v2/source/FtpFile.md
+++ b/docs/en/connector-v2/source/FtpFile.md
@@ -63,6 +63,7 @@ If you use SeaTunnel Engine, It automatically integrated the hadoop jar when you
 | archive_compress_codec    | string  | no       | none                |
 | encoding                  | string  | no       | UTF-8               |
 | common-options            |         | no       | -                   |
+| file_filter_pattern       | string  | no       |                     | 
 
 ### host [string]
 
@@ -84,6 +85,59 @@ The target ftp password is required
 
 The source file path.
 
+### file_filter_pattern [string]
+
+Filter pattern, which used for filtering files.
+
+The pattern follows standard regular expressions. For details, please refer to https://en.wikipedia.org/wiki/Regular_expression.
+There are some examples.
+
+File Structure Example:
+```
+/data/seatunnel/20241001/report.txt
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+/data/seatunnel/20241005/old_data.csv
+/data/seatunnel/20241012/logo.png
+```
+Matching Rules Example:
+
+**Example 1**: *Match all .txt files*，Regular Expression:
+```
+/data/seatunnel/20241001/.*\.txt
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241001/report.txt
+```
+**Example 2**: *Match all file starting with abc*，Regular Expression:
+```
+/data/seatunnel/20241002/abc.*
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+```
+**Example 3**: *Match all file starting with abc，And the fourth character is either h or g*, the Regular Expression:
+```
+/data/seatunnel/20241007/abc[h,g].*
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241007/abch202410.csv
+```
+**Example 4**: *Match third level folders starting with 202410 and files ending with .csv*, the Regular Expression:
+```
+/data/seatunnel/202410\d*/.*\.csv
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+/data/seatunnel/20241005/old_data.csv
+```
+
 ### file_format_type [string]
 
 File type, supported as the following file types:
@@ -400,6 +454,33 @@ sink {
 
 ```
 
+### Filter File
+
+```hocon
+env {
+  parallelism = 1
+  job.mode = "BATCH"
+}
+
+source {
+  FtpFile {
+    host = "192.168.31.48"
+    port = 21
+    user = tyrantlucifer
+    password = tianchao
+    path = "/seatunnel/read/binary/"
+    file_format_type = "binary"
+    // file example abcD2024.csv
+    file_filter_pattern = "abc[DX]*.*"
+  }
+}
+
+sink {
+  Console {
+  }
+}
+```
+
 ## Changelog
 
 ### 2.2.0-beta 2022-09-26
diff --git a/docs/en/connector-v2/source/S3File.md b/docs/en/connector-v2/source/S3File.md
index d280d6dc7f2..2e23fc6a3a7 100644
--- a/docs/en/connector-v2/source/S3File.md
+++ b/docs/en/connector-v2/source/S3File.md
@@ -196,7 +196,7 @@ If you assign file type to `parquet` `orc`, schema option not required, connecto
 
 ## Options
 
-|              name               |  type   | required |                     default value                     | Description                                                                                                                                                                                                                                                                                                                                                                                                |
+| name                            |  type   | required | default value                                         | Description                                                                                                                                                                                                                                                                                                                                                                                                |
 |---------------------------------|---------|----------|-------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | path                            | string  | yes      | -                                                     | The s3 path that needs to be read can have sub paths, but the sub paths need to meet certain format requirements. Specific requirements can be referred to "parse_partition_from_path" option                                                                                                                                                                                                              |
 | file_format_type                | string  | yes      | -                                                     | File type, supported as the following file types: `text` `csv` `parquet` `orc` `json` `excel` `xml` `binary`                                                                                                                                                                                                                                                                                               |
@@ -221,11 +221,65 @@ If you assign file type to `parquet` `orc`, schema option not required, connecto
 | archive_compress_codec          | string  | no       | none                                                  |                                                                                                                                                                                                                                                                                                                                                                                                            |
 | encoding                        | string  | no       | UTF-8                                                 |                                                                                                                                                                                                                                                                                                                                                                                                            |
 | common-options                  |         | no       | -                                                     | Source plugin common parameters, please refer to [Source Common Options](../source-common-options.md) for details.                                                                                                                                                                                                                                                                                         |
+| file_filter_pattern             | string  | no       |                                                       | `*.txt` means you only need read the files end with `.txt`                                                                                                                                                                                                                                                                                                                                                 |
 
 ### delimiter/field_delimiter [string]
 
 **delimiter** parameter will deprecate after version 2.3.5, please use **field_delimiter** instead.
 
+### file_filter_pattern [string]
+
+Filter pattern, which used for filtering files.
+
+The pattern follows standard regular expressions. For details, please refer to https://en.wikipedia.org/wiki/Regular_expression.
+There are some examples.
+
+File Structure Example:
+```
+/data/seatunnel/20241001/report.txt
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+/data/seatunnel/20241005/old_data.csv
+/data/seatunnel/20241012/logo.png
+```
+Matching Rules Example:
+
+**Example 1**: *Match all .txt files*，Regular Expression:
+```
+/data/seatunnel/20241001/.*\.txt
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241001/report.txt
+```
+**Example 2**: *Match all file starting with abc*，Regular Expression:
+```
+/data/seatunnel/20241002/abc.*
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+```
+**Example 3**: *Match all file starting with abc，And the fourth character is either h or g*, the Regular Expression:
+```
+/data/seatunnel/20241007/abc[h,g].*
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241007/abch202410.csv
+```
+**Example 4**: *Match third level folders starting with 202410 and files ending with .csv*, the Regular Expression:
+```
+/data/seatunnel/202410\d*/.*\.csv
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+/data/seatunnel/20241005/old_data.csv
+```
+
 ### compress_codec [string]
 
 The compress codec of files and the details that supported as the following shown:
@@ -349,6 +403,33 @@ sink {
 }
 ```
 
+### Filter File
+
+```hocon
+env {
+  parallelism = 1
+  job.mode = "BATCH"
+}
+
+source {
+  S3File {
+    path = "/seatunnel/json"
+    bucket = "s3a://seatunnel-test"
+    fs.s3a.endpoint="s3.cn-north-1.amazonaws.com.cn"
+    fs.s3a.aws.credentials.provider="com.amazonaws.auth.InstanceProfileCredentialsProvider"
+    file_format_type = "json"
+    read_columns = ["id", "name"]
+    // file example abcD2024.csv
+    file_filter_pattern = "abc[DX]*.*"
+  }
+}
+
+sink {
+  Console {
+  }
+}
+```
+
 ## Changelog
 
 ### 2.3.0-beta 2022-10-20
diff --git a/docs/en/connector-v2/source/SftpFile.md b/docs/en/connector-v2/source/SftpFile.md
index 3eadcd3a69e..ff9964cefef 100644
--- a/docs/en/connector-v2/source/SftpFile.md
+++ b/docs/en/connector-v2/source/SftpFile.md
@@ -71,7 +71,7 @@ The File does not have a specific type list, and we can indicate which SeaTunnel
 
 ## Source Options
 
-|           Name            |  Type   | Required |    default value    |                                                                                                                                                                                   Description                                                                                                                                                                                   |
+|           Name            |  Type   | Required |    default value    | Description                                                                                                                                                                                                                                                                                                                                                                     |
 |---------------------------|---------|----------|---------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | host                      | String  | Yes      | -                   | The target sftp host is required                                                                                                                                                                                                                                                                                                                                                |
 | port                      | Int     | Yes      | -                   | The target sftp port is required                                                                                                                                                                                                                                                                                                                                                |
@@ -95,6 +95,60 @@ The File does not have a specific type list, and we can indicate which SeaTunnel
 | archive_compress_codec    | string  | no       | none                |
 | encoding                  | string  | no       | UTF-8               |
 | common-options            |         | No       | -                   | Source plugin common parameters, please refer to [Source Common Options](../source-common-options.md) for details.                                                                                                                                                                                                                                                              |
+| file_filter_pattern       | string  | no       |                     | `*.txt` means you only need read the files end with `.txt`                                                                                                                                                                                                                                                                                                                      |
+
+### file_filter_pattern [string]
+
+Filter pattern, which used for filtering files.
+
+The pattern follows standard regular expressions. For details, please refer to https://en.wikipedia.org/wiki/Regular_expression.
+There are some examples.
+
+File Structure Example:
+```
+/data/seatunnel/20241001/report.txt
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+/data/seatunnel/20241005/old_data.csv
+/data/seatunnel/20241012/logo.png
+```
+Matching Rules Example:
+
+**Example 1**: *Match all .txt files*，Regular Expression:
+```
+/data/seatunnel/20241001/.*\.txt
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241001/report.txt
+```
+**Example 2**: *Match all file starting with abc*，Regular Expression:
+```
+/data/seatunnel/20241002/abc.*
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+```
+**Example 3**: *Match all file starting with abc，And the fourth character is either h or g*, the Regular Expression:
+```
+/data/seatunnel/20241007/abc[h,g].*
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241007/abch202410.csv
+```
+**Example 4**: *Match third level folders starting with 202410 and files ending with .csv*, the Regular Expression:
+```
+/data/seatunnel/202410\d*/.*\.csv
+```
+The result of this example matching is:
+```
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+/data/seatunnel/20241005/old_data.csv
+```
 
 ### file_format_type [string]
 
@@ -305,3 +359,30 @@ SftpFile {
 
 ```
 
+### Filter File
+
+```hocon
+env {
+  parallelism = 1
+  job.mode = "BATCH"
+}
+
+source {
+  SftpFile {
+    host = "sftp"
+    port = 22
+    user = seatunnel
+    password = pass
+    path = "tmp/seatunnel/read/json"
+    file_format_type = "json"
+    result_table_name = "sftp"
+    // file example abcD2024.csv
+    file_filter_pattern = "abc[DX]*.*"
+  }
+}
+
+sink {
+  Console {
+  }
+}
+```
\ No newline at end of file
diff --git a/docs/zh/connector-v2/source/HdfsFile.md b/docs/zh/connector-v2/source/HdfsFile.md
index 0f983a80bcf..24ca67bfca4 100644
--- a/docs/zh/connector-v2/source/HdfsFile.md
+++ b/docs/zh/connector-v2/source/HdfsFile.md
@@ -39,7 +39,7 @@
 
 ## 源选项
 
-|            名称             |   类型    | 是否必须 |      默认值       |                                                                                                                     描述                                                                                                                      |
+|            名称             |   类型    | 是否必须 |      默认值       | 描述                                                                                                                                                                                                                                          |
 |---------------------------|---------|------|----------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | path                      | string  | 是    | -              | 源文件路径。                                                                                                                                                                                                                                      |
 | file_format_type          | string  | 是    | -              | 我们支持以下文件类型：`text` `json` `csv` `orc` `parquet` `excel`。请注意，最终文件名将以文件格式的后缀结束，文本文件的后缀是 `txt`。                                                                                                                                                 |
@@ -55,6 +55,7 @@
 | kerberos_principal        | string  | 否    | -              | kerberos 的 principal。                                                                                                                                                                                                                       |
 | kerberos_keytab_path      | string  | 否    | -              | kerberos 的 keytab 路径。                                                                                                                                                                                                                       |
 | skip_header_row_number    | long    | 否    | 0              | 跳过前几行，但仅适用于 txt 和 csv。例如，设置如下：`skip_header_row_number = 2`。然后 Seatunnel 将跳过源文件中的前两行。                                                                                                                                                        |
+| file_filter_pattern       | string  | 否    | -              | `*.txt` 代表你能过滤出来以txt为后缀名的文件。                                                                                                                                                                                                                |
 | schema                    | config  | 否    | -              | 上游数据的模式字段。                                                                                                                                                                                                                                  |
 | sheet_name                | string  | 否    | -              | 读取工作簿的表格，仅在文件格式为 excel 时使用。                                                                                                                                                                                                                 |
 | compress_codec            | string  | 否    | none           | 文件的压缩编解码器。                                                                                                                                                                                                                                  |
@@ -64,6 +65,58 @@
 
 **delimiter** 参数在版本 2.3.5 后将被弃用，请改用 **field_delimiter**。
 
+### file_filter_pattern [string]
+
+这个过滤规则遵循正则表达式. 关于详情，请参考 https://en.wikipedia.org/wiki/Regular_expression 学习
+
+这里是一些例子.
+
+文件清单:
+```
+/data/seatunnel/20241001/report.txt
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+/data/seatunnel/20241005/old_data.csv
+/data/seatunnel/20241012/logo.png
+```
+匹配规则:
+
+**例子 1**: *匹配所有txt为后缀名的文件*，匹配正则为:
+```
+/data/seatunnel/20241001/.*\.txt
+```
+匹配的结果是:
+```
+/data/seatunnel/20241001/report.txt
+```
+**例子 2**: *匹配所有文件名以abc开头的文件*，匹配正则为:
+```
+/data/seatunnel/20241002/abc.*
+```
+匹配的结果是:
+```
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+```
+**例子 3**: *匹配所有文件名以abc开头，并且文件第四个字母是 h 或者 g 的文件*, 匹配正则为:
+```
+/data/seatunnel/20241007/abc[h,g].*
+```
+匹配的结果是:
+```
+/data/seatunnel/20241007/abch202410.csv
+```
+**例子 4**: *匹配所有文件夹第三级以 202410 开头并且文件后缀名是.csv的文件*, 匹配正则为:
+```
+/data/seatunnel/202410\d*/.*\.csv
+```
+匹配的结果是:
+```
+/data/seatunnel/20241007/abch202410.csv
+/data/seatunnel/20241002/abcg202410.csv
+/data/seatunnel/20241005/old_data.csv
+```
+
 ### compress_codec [string]
 
 文件的压缩编解码器及支持的详细信息如下所示：
@@ -125,3 +178,25 @@ sink {
 }
 ```
 
+### Filter File
+
+```hocon
+env {
+  parallelism = 1
+  job.mode = "BATCH"
+}
+
+source {
+  HdfsFile {
+    path = "/apps/hive/demo/student"
+    file_format_type = "json"
+    fs.defaultFS = "hdfs://namenode001"
+    file_filter_pattern = "abc[DX]*.*"
+  }
+}
+
+sink {
+  Console {
+  }
+}
+```
\ No newline at end of file

From 9dc6fe768c3e2726d1029c9368585141b72092d9 Mon Sep 17 00:00:00 2001
From: Jia Fan <fanjiaeminem@qq.com>
Date: Tue, 29 Oct 2024 18:09:34 +0800
Subject: [PATCH 17/17] update

---
 docs/en/connector-v2/source/CosFile.md      | 48 ++++++++++-----------
 docs/en/connector-v2/source/FtpFile.md      |  1 -
 docs/en/connector-v2/source/HdfsFile.md     |  4 +-
 docs/en/connector-v2/source/LocalFile.md    | 42 +++++++++---------
 docs/en/connector-v2/source/OssFile.md      |  4 +-
 docs/en/connector-v2/source/OssJindoFile.md | 48 ++++++++++-----------
 docs/en/connector-v2/source/S3File.md       |  4 +-
 docs/en/connector-v2/source/SftpFile.md     |  1 -
 docs/zh/connector-v2/source/HdfsFile.md     |  6 ++-
 9 files changed, 79 insertions(+), 79 deletions(-)

diff --git a/docs/en/connector-v2/source/CosFile.md b/docs/en/connector-v2/source/CosFile.md
index 4b0675b9893..15b6de0c6f8 100644
--- a/docs/en/connector-v2/source/CosFile.md
+++ b/docs/en/connector-v2/source/CosFile.md
@@ -45,30 +45,30 @@ To use this connector you need put hadoop-cos-{hadoop.version}-{version}.jar and
 
 ## Options
 
-|           name            |  type   | required | default value                                              |
-|---------------------------|---------|----------|------------------------------------------------------------|
-| path                      | string  | yes      | -                                                          |
-| file_format_type          | string  | yes      | -                                                          |
-| bucket                    | string  | yes      | -                                                          |
-| secret_id                 | string  | yes      | -                                                          |
-| secret_key                | string  | yes      | -                                                          |
-| region                    | string  | yes      | -                                                          |
-| read_columns              | list    | yes      | -                                                          |
-| delimiter/field_delimiter | string  | no       | \001                                                       |
-| parse_partition_from_path | boolean | no       | true                                                       |
-| skip_header_row_number    | long    | no       | 0                                                          |
-| date_format               | string  | no       | yyyy-MM-dd                                                 |
-| datetime_format           | string  | no       | yyyy-MM-dd HH:mm:ss                                        |
-| time_format               | string  | no       | HH:mm:ss                                                   |
-| schema                    | config  | no       | -                                                          |
-| sheet_name                | string  | no       | -                                                          |
-| xml_row_tag               | string  | no       | -                                                          |
-| xml_use_attr_format       | boolean | no       | -                                                          |
-| file_filter_pattern       | string  | no       | `*.txt` means you only need read the files end with `.txt` |
-| compress_codec            | string  | no       | none                                                       |
-| archive_compress_codec    | string  | no       | none                                                       |
-| encoding                  | string  | no       | UTF-8                                                      |
-| common-options            |         | no       | -                                                          |
+| name                      | type    | required | default value       |
+|---------------------------|---------|----------|---------------------|
+| path                      | string  | yes      | -                   |
+| file_format_type          | string  | yes      | -                   |
+| bucket                    | string  | yes      | -                   |
+| secret_id                 | string  | yes      | -                   |
+| secret_key                | string  | yes      | -                   |
+| region                    | string  | yes      | -                   |
+| read_columns              | list    | yes      | -                   |
+| delimiter/field_delimiter | string  | no       | \001                |
+| parse_partition_from_path | boolean | no       | true                |
+| skip_header_row_number    | long    | no       | 0                   |
+| date_format               | string  | no       | yyyy-MM-dd          |
+| datetime_format           | string  | no       | yyyy-MM-dd HH:mm:ss |
+| time_format               | string  | no       | HH:mm:ss            |
+| schema                    | config  | no       | -                   |
+| sheet_name                | string  | no       | -                   |
+| xml_row_tag               | string  | no       | -                   |
+| xml_use_attr_format       | boolean | no       | -                   |
+| file_filter_pattern       | string  | no       |                     |
+| compress_codec            | string  | no       | none                |
+| archive_compress_codec    | string  | no       | none                |
+| encoding                  | string  | no       | UTF-8               |
+| common-options            |         | no       | -                   |
 
 ### path [string]
 
diff --git a/docs/en/connector-v2/source/FtpFile.md b/docs/en/connector-v2/source/FtpFile.md
index 3fe7e6b88f3..6d114813769 100644
--- a/docs/en/connector-v2/source/FtpFile.md
+++ b/docs/en/connector-v2/source/FtpFile.md
@@ -63,7 +63,6 @@ If you use SeaTunnel Engine, It automatically integrated the hadoop jar when you
 | archive_compress_codec    | string  | no       | none                |
 | encoding                  | string  | no       | UTF-8               |
 | common-options            |         | no       | -                   |
-| file_filter_pattern       | string  | no       |                     | 
 
 ### host [string]
 
diff --git a/docs/en/connector-v2/source/HdfsFile.md b/docs/en/connector-v2/source/HdfsFile.md
index 300d96f0f16..405dfff820f 100644
--- a/docs/en/connector-v2/source/HdfsFile.md
+++ b/docs/en/connector-v2/source/HdfsFile.md
@@ -41,7 +41,7 @@ Read data from hdfs file system.
 
 ## Source Options
 
-|           Name            |  Type   | Required |       Default       | Description                                                                                                                                                                                                                                                                                                                                   |
+| Name                      | Type    | Required | Default             | Description                                                                                                                                                                                                                                                                                                                                   |
 |---------------------------|---------|----------|---------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | path                      | string  | yes      | -                   | The source file path.                                                                                                                                                                                                                                                                                                                         |
 | file_format_type          | string  | yes      | -                   | We supported as the following file types:`text` `csv` `parquet` `orc` `json` `excel` `xml` `binary`.Please note that, The final file name will end with the file_format's suffix, the suffix of the text file is `txt`.                                                                                                                       |
@@ -62,7 +62,7 @@ Read data from hdfs file system.
 | sheet_name                | string  | no       | -                   | Reader the sheet of the workbook,Only used when file_format is excel.                                                                                                                                                                                                                                                                         |
 | xml_row_tag               | string  | no       | -                   | Specifies the tag name of the data rows within the XML file, only used when file_format is xml.                                                                                                                                                                                                                                               |
 | xml_use_attr_format       | boolean | no       | -                   | Specifies whether to process data using the tag attribute format, only used when file_format is xml.                                                                                                                                                                                                                                          |
-| file_filter_pattern       | string  | no       |                     | `*.txt` means you only need read the files end with `.txt`                                                                                                                                                                                                                                                                                    |
+| file_filter_pattern       | string  | no       |                     | Filter pattern, which used for filtering files.                                                                                                                                                                                                                                                                                               |
 | compress_codec            | string  | no       | none                | The compress codec of files                                                                                                                                                                                                                                                                                                                   |
 | archive_compress_codec    | string  | no       | none                |
 | encoding                  | string  | no       | UTF-8               |                                                                                                                                                                                                                                                                                                                                               |
diff --git a/docs/en/connector-v2/source/LocalFile.md b/docs/en/connector-v2/source/LocalFile.md
index fb0e8de9139..65f287f057b 100644
--- a/docs/en/connector-v2/source/LocalFile.md
+++ b/docs/en/connector-v2/source/LocalFile.md
@@ -43,27 +43,27 @@ If you use SeaTunnel Engine, It automatically integrated the hadoop jar when you
 
 ## Options
 
-|           name            |  type   | required | default value                                              |
-|---------------------------|---------|----------|------------------------------------------------------------|
-| path                      | string  | yes      | -                                                          |
-| file_format_type          | string  | yes      | -                                                          |
-| read_columns              | list    | no       | -                                                          |
-| delimiter/field_delimiter | string  | no       | \001                                                       |
-| parse_partition_from_path | boolean | no       | true                                                       |
-| date_format               | string  | no       | yyyy-MM-dd                                                 |
-| datetime_format           | string  | no       | yyyy-MM-dd HH:mm:ss                                        |
-| time_format               | string  | no       | HH:mm:ss                                                   |
-| skip_header_row_number    | long    | no       | 0                                                          |
-| schema                    | config  | no       | -                                                          |
-| sheet_name                | string  | no       | -                                                          |
-| xml_row_tag               | string  | no       | -                                                          |
-| xml_use_attr_format       | boolean | no       | -                                                          |
-| file_filter_pattern       | string  | no       | `*.txt` means you only need read the files end with `.txt` |
-| compress_codec            | string  | no       | none                                                       |
-| archive_compress_codec    | string  | no       | none                                                       |
-| encoding                  | string  | no       | UTF-8                                                      |
-| common-options            |         | no       | -                                                          |
-| tables_configs            | list    | no       | used to define a multiple table task                       |
+| name                      | type    | required | default value                        |
+|---------------------------|---------|----------|--------------------------------------|
+| path                      | string  | yes      | -                                    |
+| file_format_type          | string  | yes      | -                                    |
+| read_columns              | list    | no       | -                                    |
+| delimiter/field_delimiter | string  | no       | \001                                 |
+| parse_partition_from_path | boolean | no       | true                                 |
+| date_format               | string  | no       | yyyy-MM-dd                           |
+| datetime_format           | string  | no       | yyyy-MM-dd HH:mm:ss                  |
+| time_format               | string  | no       | HH:mm:ss                             |
+| skip_header_row_number    | long    | no       | 0                                    |
+| schema                    | config  | no       | -                                    |
+| sheet_name                | string  | no       | -                                    |
+| xml_row_tag               | string  | no       | -                                    |
+| xml_use_attr_format       | boolean | no       | -                                    |
+| file_filter_pattern       | string  | no       |                                      |
+| compress_codec            | string  | no       | none                                 |
+| archive_compress_codec    | string  | no       | none                                 |
+| encoding                  | string  | no       | UTF-8                                |
+| common-options            |         | no       | -                                    |
+| tables_configs            | list    | no       | used to define a multiple table task |
 
 ### path [string]
 
diff --git a/docs/en/connector-v2/source/OssFile.md b/docs/en/connector-v2/source/OssFile.md
index 01bbde41c1e..36d998f054c 100644
--- a/docs/en/connector-v2/source/OssFile.md
+++ b/docs/en/connector-v2/source/OssFile.md
@@ -190,7 +190,7 @@ If you assign file type to `parquet` `orc`, schema option not required, connecto
 
 ## Options
 
-|           name            |  type   | required |    default value    |                                                                                                                                                             Description                                                                                                                                                             |
+| name                      | type    | required | default value       | Description                                                                                                                                                                                                                                                                                                                         |
 |---------------------------|---------|----------|---------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | path                      | string  | yes      | -                   | The Oss path that needs to be read can have sub paths, but the sub paths need to meet certain format requirements. Specific requirements can be referred to "parse_partition_from_path" option                                                                                                                                      |
 | file_format_type          | string  | yes      | -                   | File type, supported as the following file types: `text` `csv` `parquet` `orc` `json` `excel` `xml` `binary`                                                                                                                                                                                                                        |
@@ -211,7 +211,7 @@ If you assign file type to `parquet` `orc`, schema option not required, connecto
 | xml_use_attr_format       | boolean | no       | -                   | Specifies whether to process data using the tag attribute format, only used when file_format is xml.                                                                                                                                                                                                                                |
 | compress_codec            | string  | no       | none                | Which compress codec the files used.                                                                                                                                                                                                                                                                                                |
 | encoding                  | string  | no       | UTF-8               |
-| file_filter_pattern       | string  | no       |                     | `*.txt` means you only need read the files end with `.txt`                                                                                                                                                                                                                                                                          |
+| file_filter_pattern       | string  | no       |                     | Filter pattern, which used for filtering files.                                                                                                                                                                                                                                                                                     |
 | common-options            | config  | no       | -                   | Source plugin common parameters, please refer to [Source Common Options](../source-common-options.md) for details.                                                                                                                                                                                                                  |
 
 ### compress_codec [string]
diff --git a/docs/en/connector-v2/source/OssJindoFile.md b/docs/en/connector-v2/source/OssJindoFile.md
index 97e6981aea0..933439edc9f 100644
--- a/docs/en/connector-v2/source/OssJindoFile.md
+++ b/docs/en/connector-v2/source/OssJindoFile.md
@@ -49,30 +49,30 @@ It only supports hadoop version **2.9.X+**.
 
 ## Options
 
-|           name            |  type   | required | default value                                              |
-|---------------------------|---------|----------|------------------------------------------------------------|
-| path                      | string  | yes      | -                                                          |
-| file_format_type          | string  | yes      | -                                                          |
-| bucket                    | string  | yes      | -                                                          |
-| access_key                | string  | yes      | -                                                          |
-| access_secret             | string  | yes      | -                                                          |
-| endpoint                  | string  | yes      | -                                                          |
-| read_columns              | list    | no       | -                                                          |
-| delimiter/field_delimiter | string  | no       | \001                                                       |
-| parse_partition_from_path | boolean | no       | true                                                       |
-| date_format               | string  | no       | yyyy-MM-dd                                                 |
-| datetime_format           | string  | no       | yyyy-MM-dd HH:mm:ss                                        |
-| time_format               | string  | no       | HH:mm:ss                                                   |
-| skip_header_row_number    | long    | no       | 0                                                          |
-| schema                    | config  | no       | -                                                          |
-| sheet_name                | string  | no       | -                                                          |
-| xml_row_tag               | string  | no       | -                                                          |
-| xml_use_attr_format       | boolean | no       | -                                                          |
-| file_filter_pattern       | string  | no       | `*.txt` means you only need read the files end with `.txt` |
-| compress_codec            | string  | no       | none                                                       |
-| archive_compress_codec    | string  | no       | none                                                       |
-| encoding                  | string  | no       | UTF-8                                                      |
-| common-options            |         | no       | -                                                          |
+| name                      | type    | required | default value       |
+|---------------------------|---------|----------|---------------------|
+| path                      | string  | yes      | -                   |
+| file_format_type          | string  | yes      | -                   |
+| bucket                    | string  | yes      | -                   |
+| access_key                | string  | yes      | -                   |
+| access_secret             | string  | yes      | -                   |
+| endpoint                  | string  | yes      | -                   |
+| read_columns              | list    | no       | -                   |
+| delimiter/field_delimiter | string  | no       | \001                |
+| parse_partition_from_path | boolean | no       | true                |
+| date_format               | string  | no       | yyyy-MM-dd          |
+| datetime_format           | string  | no       | yyyy-MM-dd HH:mm:ss |
+| time_format               | string  | no       | HH:mm:ss            |
+| skip_header_row_number    | long    | no       | 0                   |
+| schema                    | config  | no       | -                   |
+| sheet_name                | string  | no       | -                   |
+| xml_row_tag               | string  | no       | -                   |
+| xml_use_attr_format       | boolean | no       | -                   |
+| file_filter_pattern       | string  | no       |                     |
+| compress_codec            | string  | no       | none                |
+| archive_compress_codec    | string  | no       | none                |
+| encoding                  | string  | no       | UTF-8               |
+| common-options            |         | no       | -                   |
 
 ### path [string]
 
diff --git a/docs/en/connector-v2/source/S3File.md b/docs/en/connector-v2/source/S3File.md
index 2e23fc6a3a7..4834b025bc3 100644
--- a/docs/en/connector-v2/source/S3File.md
+++ b/docs/en/connector-v2/source/S3File.md
@@ -196,7 +196,7 @@ If you assign file type to `parquet` `orc`, schema option not required, connecto
 
 ## Options
 
-| name                            |  type   | required | default value                                         | Description                                                                                                                                                                                                                                                                                                                                                                                                |
+| name                            | type    | required | default value                                         | Description                                                                                                                                                                                                                                                                                                                                                                                                |
 |---------------------------------|---------|----------|-------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | path                            | string  | yes      | -                                                     | The s3 path that needs to be read can have sub paths, but the sub paths need to meet certain format requirements. Specific requirements can be referred to "parse_partition_from_path" option                                                                                                                                                                                                              |
 | file_format_type                | string  | yes      | -                                                     | File type, supported as the following file types: `text` `csv` `parquet` `orc` `json` `excel` `xml` `binary`                                                                                                                                                                                                                                                                                               |
@@ -220,8 +220,8 @@ If you assign file type to `parquet` `orc`, schema option not required, connecto
 | compress_codec                  | string  | no       | none                                                  |                                                                                                                                                                                                                                                                                                                                                                                                            |
 | archive_compress_codec          | string  | no       | none                                                  |                                                                                                                                                                                                                                                                                                                                                                                                            |
 | encoding                        | string  | no       | UTF-8                                                 |                                                                                                                                                                                                                                                                                                                                                                                                            |
+| file_filter_pattern             | string  | no       |                                                       | Filter pattern, which used for filtering files.                                                                                                                                                                                                                                                                                                                                                            |
 | common-options                  |         | no       | -                                                     | Source plugin common parameters, please refer to [Source Common Options](../source-common-options.md) for details.                                                                                                                                                                                                                                                                                         |
-| file_filter_pattern             | string  | no       |                                                       | `*.txt` means you only need read the files end with `.txt`                                                                                                                                                                                                                                                                                                                                                 |
 
 ### delimiter/field_delimiter [string]
 
diff --git a/docs/en/connector-v2/source/SftpFile.md b/docs/en/connector-v2/source/SftpFile.md
index ff9964cefef..95c710110a0 100644
--- a/docs/en/connector-v2/source/SftpFile.md
+++ b/docs/en/connector-v2/source/SftpFile.md
@@ -95,7 +95,6 @@ The File does not have a specific type list, and we can indicate which SeaTunnel
 | archive_compress_codec    | string  | no       | none                |
 | encoding                  | string  | no       | UTF-8               |
 | common-options            |         | No       | -                   | Source plugin common parameters, please refer to [Source Common Options](../source-common-options.md) for details.                                                                                                                                                                                                                                                              |
-| file_filter_pattern       | string  | no       |                     | `*.txt` means you only need read the files end with `.txt`                                                                                                                                                                                                                                                                                                                      |
 
 ### file_filter_pattern [string]
 
diff --git a/docs/zh/connector-v2/source/HdfsFile.md b/docs/zh/connector-v2/source/HdfsFile.md
index 24ca67bfca4..9cd254ef808 100644
--- a/docs/zh/connector-v2/source/HdfsFile.md
+++ b/docs/zh/connector-v2/source/HdfsFile.md
@@ -39,7 +39,7 @@
 
 ## 源选项
 
-|            名称             |   类型    | 是否必须 |      默认值       | 描述                                                                                                                                                                                                                                          |
+| 名称                        | 类型      | 是否必须 | 默认值            | 描述                                                                                                                                                                                                                                          |
 |---------------------------|---------|------|----------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | path                      | string  | 是    | -              | 源文件路径。                                                                                                                                                                                                                                      |
 | file_format_type          | string  | 是    | -              | 我们支持以下文件类型：`text` `json` `csv` `orc` `parquet` `excel`。请注意，最终文件名将以文件格式的后缀结束，文本文件的后缀是 `txt`。                                                                                                                                                 |
@@ -55,7 +55,7 @@
 | kerberos_principal        | string  | 否    | -              | kerberos 的 principal。                                                                                                                                                                                                                       |
 | kerberos_keytab_path      | string  | 否    | -              | kerberos 的 keytab 路径。                                                                                                                                                                                                                       |
 | skip_header_row_number    | long    | 否    | 0              | 跳过前几行，但仅适用于 txt 和 csv。例如，设置如下：`skip_header_row_number = 2`。然后 Seatunnel 将跳过源文件中的前两行。                                                                                                                                                        |
-| file_filter_pattern       | string  | 否    | -              | `*.txt` 代表你能过滤出来以txt为后缀名的文件。                                                                                                                                                                                                                |
+| file_filter_pattern       | string  | 否    | -              | 过滤模式，用于过滤文件。                                                                                                                                                                                                                                |
 | schema                    | config  | 否    | -              | 上游数据的模式字段。                                                                                                                                                                                                                                  |
 | sheet_name                | string  | 否    | -              | 读取工作簿的表格，仅在文件格式为 excel 时使用。                                                                                                                                                                                                                 |
 | compress_codec            | string  | 否    | none           | 文件的压缩编解码器。                                                                                                                                                                                                                                  |
@@ -67,6 +67,8 @@
 
 ### file_filter_pattern [string]
 
+过滤模式，用于过滤文件。
+
 这个过滤规则遵循正则表达式. 关于详情，请参考 https://en.wikipedia.org/wiki/Regular_expression 学习
 
 这里是一些例子.