From 7517d106ad4ce434bdffc8522d56105d5a22fc21 Mon Sep 17 00:00:00 2001 From: Eric Date: Wed, 15 May 2024 16:22:52 +0800 Subject: [PATCH] [Feature][S3 File] Make S3 File Connector support multiple table write (#6698) --- docs/en/connector-v2/sink/S3File.md | 2 +- .../connector-file/connector-file-s3/pom.xml | 2 +- .../file/s3/catalog/S3FileCatalogFactory.java | 4 +- .../config/{S3Conf.java => S3HadoopConf.java} | 39 ++-- .../seatunnel/file/s3/sink/S3FileSink.java | 4 +- .../file/s3/source/S3FileSource.java | 106 +---------- .../file/s3/source/S3FileSourceFactory.java | 10 ++ .../MultipleTableS3FileSourceConfig.java | 34 ++++ .../s3/source/config/S3FileSourceConfig.java | 46 +++++ .../seatunnel/hive/config/HiveOnS3Conf.java | 6 +- .../seatunnel/hive/storage/S3StorageTest.java | 8 +- .../redshift/sink/S3RedshiftSink.java | 5 +- seatunnel-dist/pom.xml | 2 +- .../connector-file-s3-e2e/pom.xml | 77 ++++++++ .../e2e/connector/file/s3/S3FileIT.java | 166 ++++++++++++++++++ .../file/s3/S3FileWithMultipleTableIT.java | 148 ++++++++++++++++ .../e2e/connector/file/s3/S3Utils.java | 98 +++++++++++ .../src/test/resources/excel/e2e.xlsx | Bin 0 -> 5823 bytes .../resources/excel/fake_to_s3_excel.conf | 81 +++++++++ .../excel/s3_excel_projection_to_assert.conf | 109 ++++++++++++ .../resources/excel/s3_excel_to_assert.conf | 135 ++++++++++++++ ...s3_excel_to_assert_with_multipletable.conf | 134 ++++++++++++++ .../excel/s3_filter_excel_to_assert.conf | 136 ++++++++++++++ .../src/test/resources/json/e2e.json | 5 + .../src/test/resources/json/e2e.json.lzo | Bin 0 -> 3466 bytes .../resources/json/fake_to_s3_file_json.conf | 86 +++++++++ .../json/s3_file_json_lzo_to_console.conf | 144 +++++++++++++++ .../json/s3_file_json_to_assert.conf | 133 ++++++++++++++ ...ile_json_to_assert_with_multipletable.conf | 130 ++++++++++++++ .../resources/json/s3_file_to_console.conf | 46 +++++ .../src/test/resources/orc/e2e.orc | Bin 0 -> 5730 bytes .../resources/orc/fake_to_s3_file_orc.conf | 87 +++++++++ .../orc/s3_file_orc_projection_to_assert.conf | 82 +++++++++ .../resources/orc/s3_file_orc_to_assert.conf | 81 +++++++++ ...file_orc_to_assert_with_multipletable.conf | 66 +++++++ .../src/test/resources/parquet/e2e.parquet | Bin 0 -> 9730 bytes .../parquet/fake_to_s3_file_parquet.conf | 87 +++++++++ .../s3_file_parquet_projection_to_assert.conf | 82 +++++++++ .../parquet/s3_file_parquet_to_assert.conf | 99 +++++++++++ ..._parquet_to_assert_with_multipletable.conf | 66 +++++++ .../resources/parquet/s3_file_to_console.conf | 42 +++++ .../src/test/resources/text/e2e.txt | 5 + .../src/test/resources/text/e2e.txt.lzo | Bin 0 -> 2720 bytes .../src/test/resources/text/e2e_delimiter.txt | 5 + .../test/resources/text/e2e_time_format.txt | 5 + .../resources/text/fake_to_s3_file_text.conf | 87 +++++++++ .../fake_to_s3_file_with_multiple_table.conf | 128 ++++++++++++++ .../text/s3_file_delimiter_assert.conf | 109 ++++++++++++ .../text/s3_file_text_lzo_to_assert.conf | 143 +++++++++++++++ .../s3_file_text_projection_to_assert.conf | 134 ++++++++++++++ .../text/s3_file_text_skip_headers.conf | 134 ++++++++++++++ .../text/s3_file_text_to_assert.conf | 133 ++++++++++++++ ...ile_text_to_assert_with_multipletable.conf | 130 ++++++++++++++ .../text/s3_file_time_format_assert.conf | 100 +++++++++++ .../seatunnel-connector-v2-e2e/pom.xml | 1 + .../seatunnel-hadoop3-3.1.4-uber/pom.xml | 1 + 56 files changed, 3566 insertions(+), 137 deletions(-) rename seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/config/{S3Conf.java => S3HadoopConf.java} (67%) create mode 100644 seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/source/config/MultipleTableS3FileSourceConfig.java create mode 100644 seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/source/config/S3FileSourceConfig.java create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/pom.xml create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/s3/S3FileIT.java create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/s3/S3FileWithMultipleTableIT.java create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/s3/S3Utils.java create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/excel/e2e.xlsx create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/excel/fake_to_s3_excel.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/excel/s3_excel_projection_to_assert.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/excel/s3_excel_to_assert.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/excel/s3_excel_to_assert_with_multipletable.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/excel/s3_filter_excel_to_assert.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/json/e2e.json create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/json/e2e.json.lzo create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/json/fake_to_s3_file_json.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/json/s3_file_json_lzo_to_console.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/json/s3_file_json_to_assert.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/json/s3_file_json_to_assert_with_multipletable.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/json/s3_file_to_console.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/orc/e2e.orc create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/orc/fake_to_s3_file_orc.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/orc/s3_file_orc_projection_to_assert.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/orc/s3_file_orc_to_assert.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/orc/s3_file_orc_to_assert_with_multipletable.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/parquet/e2e.parquet create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/parquet/fake_to_s3_file_parquet.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/parquet/s3_file_parquet_projection_to_assert.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/parquet/s3_file_parquet_to_assert.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/parquet/s3_file_parquet_to_assert_with_multipletable.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/parquet/s3_file_to_console.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/e2e.txt create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/e2e.txt.lzo create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/e2e_delimiter.txt create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/e2e_time_format.txt create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/fake_to_s3_file_text.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/fake_to_s3_file_with_multiple_table.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/s3_file_delimiter_assert.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/s3_file_text_lzo_to_assert.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/s3_file_text_projection_to_assert.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/s3_file_text_skip_headers.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/s3_file_text_to_assert.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/s3_file_text_to_assert_with_multipletable.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/s3_file_time_format_assert.conf diff --git a/docs/en/connector-v2/sink/S3File.md b/docs/en/connector-v2/sink/S3File.md index 322263b1ede0..508524c51157 100644 --- a/docs/en/connector-v2/sink/S3File.md +++ b/docs/en/connector-v2/sink/S3File.md @@ -474,7 +474,7 @@ transform { sink { S3File { bucket = "s3a://seatunnel-test" - tmp_path = "/tmp/seatunnel" + tmp_path = "/tmp/seatunnel/${table_name}" path="/test/${table_name}" fs.s3a.endpoint="s3.cn-north-1.amazonaws.com.cn" fs.s3a.aws.credentials.provider="org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" diff --git a/seatunnel-connectors-v2/connector-file/connector-file-s3/pom.xml b/seatunnel-connectors-v2/connector-file/connector-file-s3/pom.xml index 261f5fdbb957..fbf0016fcedd 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-s3/pom.xml +++ b/seatunnel-connectors-v2/connector-file/connector-file-s3/pom.xml @@ -30,7 +30,7 @@ SeaTunnel : Connectors V2 : File : S3 - 2.6.5 + 3.1.4 27.0-jre diff --git a/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/catalog/S3FileCatalogFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/catalog/S3FileCatalogFactory.java index add4b0ac208d..53f350e10ca3 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/catalog/S3FileCatalogFactory.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/catalog/S3FileCatalogFactory.java @@ -24,7 +24,7 @@ import org.apache.seatunnel.api.table.factory.Factory; import org.apache.seatunnel.connectors.seatunnel.file.config.HadoopConf; import org.apache.seatunnel.connectors.seatunnel.file.hadoop.HadoopFileSystemProxy; -import org.apache.seatunnel.connectors.seatunnel.file.s3.config.S3Conf; +import org.apache.seatunnel.connectors.seatunnel.file.s3.config.S3HadoopConf; import com.google.auto.service.AutoService; @@ -32,7 +32,7 @@ public class S3FileCatalogFactory implements CatalogFactory { @Override public Catalog createCatalog(String catalogName, ReadonlyConfig options) { - HadoopConf hadoopConf = S3Conf.buildWithReadOnlyConfig(options); + HadoopConf hadoopConf = S3HadoopConf.buildWithReadOnlyConfig(options); HadoopFileSystemProxy fileSystemUtils = new HadoopFileSystemProxy(hadoopConf); return new S3FileCatalog(fileSystemUtils, options); } diff --git a/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/config/S3Conf.java b/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/config/S3HadoopConf.java similarity index 67% rename from seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/config/S3Conf.java rename to seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/config/S3HadoopConf.java index 2680ce151c7d..557403b28aca 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/config/S3Conf.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/config/S3HadoopConf.java @@ -17,16 +17,13 @@ package org.apache.seatunnel.connectors.seatunnel.file.s3.config; -import org.apache.seatunnel.shade.com.typesafe.config.Config; - import org.apache.seatunnel.api.configuration.ReadonlyConfig; -import org.apache.seatunnel.common.config.CheckConfigUtil; import org.apache.seatunnel.connectors.seatunnel.file.config.HadoopConf; import java.util.HashMap; import java.util.Map; -public class S3Conf extends HadoopConf { +public class S3HadoopConf extends HadoopConf { private static final String HDFS_S3N_IMPL = "org.apache.hadoop.fs.s3native.NativeS3FileSystem"; private static final String HDFS_S3A_IMPL = "org.apache.hadoop.fs.s3a.S3AFileSystem"; protected static final String S3A_SCHEMA = "s3a"; @@ -47,39 +44,33 @@ public void setSchema(String schema) { this.schema = schema; } - protected S3Conf(String hdfsNameKey) { + public S3HadoopConf(String hdfsNameKey) { super(hdfsNameKey); } - public static HadoopConf buildWithConfig(Config config) { + public static HadoopConf buildWithReadOnlyConfig(ReadonlyConfig config) { - String bucketName = config.getString(S3ConfigOptions.S3_BUCKET.key()); - S3Conf hadoopConf = new S3Conf(bucketName); + String bucketName = config.get(S3ConfigOptions.S3_BUCKET); + S3HadoopConf hadoopConf = new S3HadoopConf(bucketName); if (bucketName.startsWith(S3A_SCHEMA)) { hadoopConf.setSchema(S3A_SCHEMA); } HashMap s3Options = new HashMap<>(); hadoopConf.putS3SK(s3Options, config); - if (CheckConfigUtil.isValidParam(config, S3ConfigOptions.S3_PROPERTIES.key())) { - config.getObject(S3ConfigOptions.S3_PROPERTIES.key()) - .forEach((key, value) -> s3Options.put(key, String.valueOf(value.unwrapped()))); + if (config.getOptional(S3ConfigOptions.S3_PROPERTIES).isPresent()) { + config.get(S3ConfigOptions.S3_PROPERTIES) + .forEach((key, value) -> s3Options.put(key, String.valueOf(value))); } s3Options.put( S3ConfigOptions.S3A_AWS_CREDENTIALS_PROVIDER.key(), - config.getString(S3ConfigOptions.S3A_AWS_CREDENTIALS_PROVIDER.key())); + config.get(S3ConfigOptions.S3A_AWS_CREDENTIALS_PROVIDER).getProvider()); s3Options.put( - S3ConfigOptions.FS_S3A_ENDPOINT.key(), - config.getString(S3ConfigOptions.FS_S3A_ENDPOINT.key())); + S3ConfigOptions.FS_S3A_ENDPOINT.key(), config.get(S3ConfigOptions.FS_S3A_ENDPOINT)); hadoopConf.setExtraOptions(s3Options); return hadoopConf; } - public static HadoopConf buildWithReadOnlyConfig(ReadonlyConfig readonlyConfig) { - Config config = readonlyConfig.toConfig(); - return buildWithConfig(config); - } - protected String switchHdfsImpl() { switch (this.schema) { case S3A_SCHEMA: @@ -89,13 +80,13 @@ protected String switchHdfsImpl() { } } - private void putS3SK(Map s3Options, Config config) { - if (!CheckConfigUtil.isValidParam(config, S3ConfigOptions.S3_ACCESS_KEY.key()) - && !CheckConfigUtil.isValidParam(config, S3ConfigOptions.S3_SECRET_KEY.key())) { + private void putS3SK(Map s3Options, ReadonlyConfig config) { + if (!config.getOptional(S3ConfigOptions.S3_ACCESS_KEY).isPresent() + && config.getOptional(S3ConfigOptions.S3_SECRET_KEY).isPresent()) { return; } - String accessKey = config.getString(S3ConfigOptions.S3_ACCESS_KEY.key()); - String secretKey = config.getString(S3ConfigOptions.S3_SECRET_KEY.key()); + String accessKey = config.get(S3ConfigOptions.S3_ACCESS_KEY); + String secretKey = config.get(S3ConfigOptions.S3_SECRET_KEY); if (S3A_SCHEMA.equals(this.schema)) { s3Options.put("fs.s3a.access.key", accessKey); s3Options.put("fs.s3a.secret.key", secretKey); diff --git a/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/sink/S3FileSink.java b/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/sink/S3FileSink.java index b75835695235..2a636bcbcc97 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/sink/S3FileSink.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/sink/S3FileSink.java @@ -34,8 +34,8 @@ import org.apache.seatunnel.common.constants.PluginType; import org.apache.seatunnel.connectors.seatunnel.file.config.FileSystemType; import org.apache.seatunnel.connectors.seatunnel.file.exception.FileConnectorException; -import org.apache.seatunnel.connectors.seatunnel.file.s3.config.S3Conf; import org.apache.seatunnel.connectors.seatunnel.file.s3.config.S3ConfigOptions; +import org.apache.seatunnel.connectors.seatunnel.file.s3.config.S3HadoopConf; import org.apache.seatunnel.connectors.seatunnel.file.sink.BaseMultipleTableFileSink; import java.util.Optional; @@ -55,7 +55,7 @@ public String getPluginName() { } public S3FileSink(CatalogTable catalogTable, ReadonlyConfig readonlyConfig) { - super(S3Conf.buildWithConfig(readonlyConfig.toConfig()), readonlyConfig, catalogTable); + super(S3HadoopConf.buildWithReadOnlyConfig(readonlyConfig), readonlyConfig, catalogTable); this.catalogTable = catalogTable; this.readonlyConfig = readonlyConfig; Config pluginConfig = readonlyConfig.toConfig(); diff --git a/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/source/S3FileSource.java b/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/source/S3FileSource.java index f36e935818cb..a2918062ecb2 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/source/S3FileSource.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/source/S3FileSource.java @@ -17,111 +17,19 @@ package org.apache.seatunnel.connectors.seatunnel.file.s3.source; -import org.apache.seatunnel.shade.com.typesafe.config.Config; - -import org.apache.seatunnel.api.common.PrepareFailException; -import org.apache.seatunnel.api.common.SeaTunnelAPIErrorCode; -import org.apache.seatunnel.api.source.SeaTunnelSource; -import org.apache.seatunnel.api.table.catalog.CatalogTableUtil; -import org.apache.seatunnel.api.table.catalog.schema.TableSchemaOptions; -import org.apache.seatunnel.api.table.type.SeaTunnelRowType; -import org.apache.seatunnel.common.config.CheckConfigUtil; -import org.apache.seatunnel.common.config.CheckResult; -import org.apache.seatunnel.common.constants.PluginType; -import org.apache.seatunnel.common.exception.CommonErrorCodeDeprecated; -import org.apache.seatunnel.connectors.seatunnel.file.config.FileFormat; +import org.apache.seatunnel.api.configuration.ReadonlyConfig; import org.apache.seatunnel.connectors.seatunnel.file.config.FileSystemType; -import org.apache.seatunnel.connectors.seatunnel.file.exception.FileConnectorErrorCode; -import org.apache.seatunnel.connectors.seatunnel.file.exception.FileConnectorException; -import org.apache.seatunnel.connectors.seatunnel.file.s3.config.S3Conf; -import org.apache.seatunnel.connectors.seatunnel.file.s3.config.S3ConfigOptions; -import org.apache.seatunnel.connectors.seatunnel.file.source.BaseFileSource; -import org.apache.seatunnel.connectors.seatunnel.file.source.reader.ReadStrategyFactory; +import org.apache.seatunnel.connectors.seatunnel.file.s3.source.config.MultipleTableS3FileSourceConfig; +import org.apache.seatunnel.connectors.seatunnel.file.source.BaseMultipleTableFileSource; -import com.google.auto.service.AutoService; +public class S3FileSource extends BaseMultipleTableFileSource { -import java.io.IOException; + public S3FileSource(ReadonlyConfig readonlyConfig) { + super(new MultipleTableS3FileSourceConfig(readonlyConfig)); + } -@AutoService(SeaTunnelSource.class) -public class S3FileSource extends BaseFileSource { @Override public String getPluginName() { return FileSystemType.S3.getFileSystemPluginName(); } - - @Override - public void prepare(Config pluginConfig) throws PrepareFailException { - CheckResult result = - CheckConfigUtil.checkAllExists( - pluginConfig, - S3ConfigOptions.FILE_PATH.key(), - S3ConfigOptions.FILE_FORMAT_TYPE.key(), - S3ConfigOptions.S3_BUCKET.key()); - if (!result.isSuccess()) { - throw new FileConnectorException( - SeaTunnelAPIErrorCode.CONFIG_VALIDATION_FAILED, - String.format( - "PluginName: %s, PluginType: %s, Message: %s", - getPluginName(), PluginType.SOURCE, result.getMsg())); - } - String path = pluginConfig.getString(S3ConfigOptions.FILE_PATH.key()); - hadoopConf = S3Conf.buildWithConfig(pluginConfig); - readStrategy = - ReadStrategyFactory.of( - pluginConfig.getString(S3ConfigOptions.FILE_FORMAT_TYPE.key())); - readStrategy.setPluginConfig(pluginConfig); - readStrategy.init(hadoopConf); - try { - filePaths = readStrategy.getFileNamesByPath(path); - } catch (IOException e) { - String errorMsg = String.format("Get file list from this path [%s] failed", path); - throw new FileConnectorException( - FileConnectorErrorCode.FILE_LIST_GET_FAILED, errorMsg, e); - } - // support user-defined schema - FileFormat fileFormat = - FileFormat.valueOf( - pluginConfig - .getString(S3ConfigOptions.FILE_FORMAT_TYPE.key()) - .toUpperCase()); - // only json text csv type support user-defined schema now - if (pluginConfig.hasPath(TableSchemaOptions.SCHEMA.key())) { - switch (fileFormat) { - case CSV: - case TEXT: - case JSON: - case EXCEL: - case XML: - SeaTunnelRowType userDefinedSchema = - CatalogTableUtil.buildWithConfig(pluginConfig).getSeaTunnelRowType(); - readStrategy.setSeaTunnelRowTypeInfo(userDefinedSchema); - rowType = readStrategy.getActualSeaTunnelRowTypeInfo(); - break; - case ORC: - case PARQUET: - throw new FileConnectorException( - CommonErrorCodeDeprecated.UNSUPPORTED_OPERATION, - "SeaTunnel does not support user-defined schema for [parquet, orc] files"); - default: - // never got in there - throw new FileConnectorException( - CommonErrorCodeDeprecated.ILLEGAL_ARGUMENT, - "SeaTunnel does not supported this file format"); - } - } else { - if (filePaths.isEmpty()) { - // When the directory is empty, distribute default behavior schema - rowType = CatalogTableUtil.buildSimpleTextSchema(); - return; - } - try { - rowType = readStrategy.getSeaTunnelRowTypeInfo(filePaths.get(0)); - } catch (FileConnectorException e) { - String errorMsg = - String.format("Get table schema from file [%s] failed", filePaths.get(0)); - throw new FileConnectorException( - CommonErrorCodeDeprecated.TABLE_SCHEMA_GET_FAILED, errorMsg, e); - } - } - } } diff --git a/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/source/S3FileSourceFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/source/S3FileSourceFactory.java index ebd752fbf092..93d879e559cb 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/source/S3FileSourceFactory.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/source/S3FileSourceFactory.java @@ -19,9 +19,12 @@ import org.apache.seatunnel.api.configuration.util.OptionRule; import org.apache.seatunnel.api.source.SeaTunnelSource; +import org.apache.seatunnel.api.source.SourceSplit; import org.apache.seatunnel.api.table.catalog.schema.TableSchemaOptions; +import org.apache.seatunnel.api.table.connector.TableSource; import org.apache.seatunnel.api.table.factory.Factory; import org.apache.seatunnel.api.table.factory.TableSourceFactory; +import org.apache.seatunnel.api.table.factory.TableSourceFactoryContext; import org.apache.seatunnel.connectors.seatunnel.file.config.BaseSourceConfigOptions; import org.apache.seatunnel.connectors.seatunnel.file.config.FileFormat; import org.apache.seatunnel.connectors.seatunnel.file.config.FileSystemType; @@ -29,6 +32,7 @@ import com.google.auto.service.AutoService; +import java.io.Serializable; import java.util.Arrays; @AutoService(Factory.class) @@ -38,6 +42,12 @@ public String factoryIdentifier() { return FileSystemType.S3.getFileSystemPluginName(); } + @Override + public + TableSource createSource(TableSourceFactoryContext context) { + return () -> (SeaTunnelSource) new S3FileSource(context.getOptions()); + } + @Override public OptionRule optionRule() { return OptionRule.builder() diff --git a/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/source/config/MultipleTableS3FileSourceConfig.java b/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/source/config/MultipleTableS3FileSourceConfig.java new file mode 100644 index 000000000000..ac9114d9e465 --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/source/config/MultipleTableS3FileSourceConfig.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.s3.source.config; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.connectors.seatunnel.file.config.BaseFileSourceConfig; +import org.apache.seatunnel.connectors.seatunnel.file.config.BaseMultipleTableFileSourceConfig; + +public class MultipleTableS3FileSourceConfig extends BaseMultipleTableFileSourceConfig { + + public MultipleTableS3FileSourceConfig(ReadonlyConfig s3FileSourceRootConfig) { + super(s3FileSourceRootConfig); + } + + @Override + public BaseFileSourceConfig getBaseSourceConfig(ReadonlyConfig readonlyConfig) { + return new S3FileSourceConfig(readonlyConfig); + } +} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/source/config/S3FileSourceConfig.java b/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/source/config/S3FileSourceConfig.java new file mode 100644 index 000000000000..e9767bf18a5f --- /dev/null +++ b/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/source/config/S3FileSourceConfig.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.file.s3.source.config; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.connectors.seatunnel.file.config.BaseFileSourceConfig; +import org.apache.seatunnel.connectors.seatunnel.file.config.FileSystemType; +import org.apache.seatunnel.connectors.seatunnel.file.config.HadoopConf; +import org.apache.seatunnel.connectors.seatunnel.file.s3.config.S3HadoopConf; + +import lombok.Getter; + +@Getter +public class S3FileSourceConfig extends BaseFileSourceConfig { + + private static final long serialVersionUID = 1L; + + @Override + public HadoopConf getHadoopConfig() { + return S3HadoopConf.buildWithReadOnlyConfig(getBaseFileSourceConfig()); + } + + @Override + public String getPluginName() { + return FileSystemType.S3.getFileSystemPluginName(); + } + + public S3FileSourceConfig(ReadonlyConfig readonlyConfig) { + super(readonlyConfig); + } +} diff --git a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/config/HiveOnS3Conf.java b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/config/HiveOnS3Conf.java index 01fede7517cf..be58932f9cdf 100644 --- a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/config/HiveOnS3Conf.java +++ b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/config/HiveOnS3Conf.java @@ -19,10 +19,10 @@ import org.apache.seatunnel.api.configuration.ReadonlyConfig; import org.apache.seatunnel.connectors.seatunnel.file.config.HadoopConf; -import org.apache.seatunnel.connectors.seatunnel.file.s3.config.S3Conf; import org.apache.seatunnel.connectors.seatunnel.file.s3.config.S3ConfigOptions; +import org.apache.seatunnel.connectors.seatunnel.file.s3.config.S3HadoopConf; -public class HiveOnS3Conf extends S3Conf { +public class HiveOnS3Conf extends S3HadoopConf { protected static final String S3_SCHEMA = "s3"; // The emr of amazon on s3 use this EmrFileSystem as the file system protected static final String HDFS_S3_IMPL = "com.amazon.ws.emr.hadoop.fs.EmrFileSystem"; @@ -43,7 +43,7 @@ protected String switchHdfsImpl() { } public static HadoopConf buildWithReadOnlyConfig(ReadonlyConfig readonlyConfig) { - S3Conf s3Conf = (S3Conf) S3Conf.buildWithReadOnlyConfig(readonlyConfig); + S3HadoopConf s3Conf = (S3HadoopConf) S3HadoopConf.buildWithReadOnlyConfig(readonlyConfig); String bucketName = readonlyConfig.get(S3ConfigOptions.S3_BUCKET); if (bucketName.startsWith(DEFAULT_SCHEMA)) { s3Conf.setSchema(DEFAULT_SCHEMA); diff --git a/seatunnel-connectors-v2/connector-hive/src/test/java/org/apache/seatunnel/connectors/seatunnel/hive/storage/S3StorageTest.java b/seatunnel-connectors-v2/connector-hive/src/test/java/org/apache/seatunnel/connectors/seatunnel/hive/storage/S3StorageTest.java index 52edf0fb4d91..ed475f74ed11 100644 --- a/seatunnel-connectors-v2/connector-hive/src/test/java/org/apache/seatunnel/connectors/seatunnel/hive/storage/S3StorageTest.java +++ b/seatunnel-connectors-v2/connector-hive/src/test/java/org/apache/seatunnel/connectors/seatunnel/hive/storage/S3StorageTest.java @@ -44,7 +44,9 @@ public class S3StorageTest { put( S3ConfigOptions.S3A_AWS_CREDENTIALS_PROVIDER .key(), - "provider"); + S3ConfigOptions.S3aAwsCredentialsProvider + .InstanceProfileCredentialsProvider + .getProvider()); put( S3ConfigOptions.FS_S3A_ENDPOINT.key(), "http://s3.ap-northeast-1.amazonaws.com"); @@ -65,7 +67,9 @@ public class S3StorageTest { put( S3ConfigOptions.S3A_AWS_CREDENTIALS_PROVIDER .key(), - "testProvider"); + S3ConfigOptions.S3aAwsCredentialsProvider + .InstanceProfileCredentialsProvider + .getProvider()); put(S3ConfigOptions.FS_S3A_ENDPOINT.key(), "test"); } }); diff --git a/seatunnel-connectors-v2/connector-s3-redshift/src/main/java/org/apache/seatunnel/connectors/seatunnel/redshift/sink/S3RedshiftSink.java b/seatunnel-connectors-v2/connector-s3-redshift/src/main/java/org/apache/seatunnel/connectors/seatunnel/redshift/sink/S3RedshiftSink.java index 6b954b63c482..28a8e2504297 100644 --- a/seatunnel-connectors-v2/connector-s3-redshift/src/main/java/org/apache/seatunnel/connectors/seatunnel/redshift/sink/S3RedshiftSink.java +++ b/seatunnel-connectors-v2/connector-s3-redshift/src/main/java/org/apache/seatunnel/connectors/seatunnel/redshift/sink/S3RedshiftSink.java @@ -21,14 +21,15 @@ import org.apache.seatunnel.api.common.PrepareFailException; import org.apache.seatunnel.api.common.SeaTunnelAPIErrorCode; +import org.apache.seatunnel.api.configuration.ReadonlyConfig; import org.apache.seatunnel.api.sink.SeaTunnelSink; import org.apache.seatunnel.api.sink.SinkAggregatedCommitter; import org.apache.seatunnel.common.config.CheckConfigUtil; import org.apache.seatunnel.common.config.CheckResult; import org.apache.seatunnel.common.constants.PluginType; import org.apache.seatunnel.connectors.seatunnel.file.hdfs.sink.BaseHdfsFileSink; -import org.apache.seatunnel.connectors.seatunnel.file.s3.config.S3Conf; import org.apache.seatunnel.connectors.seatunnel.file.s3.config.S3ConfigOptions; +import org.apache.seatunnel.connectors.seatunnel.file.s3.config.S3HadoopConf; import org.apache.seatunnel.connectors.seatunnel.file.sink.commit.FileAggregatedCommitInfo; import org.apache.seatunnel.connectors.seatunnel.file.sink.commit.FileCommitInfo; import org.apache.seatunnel.connectors.seatunnel.redshift.commit.S3RedshiftSinkAggregatedCommitter; @@ -66,7 +67,7 @@ public void prepare(Config pluginConfig) throws PrepareFailException { getPluginName(), PluginType.SINK, checkResult.getMsg())); } this.pluginConfig = pluginConfig; - hadoopConf = S3Conf.buildWithConfig(pluginConfig); + hadoopConf = S3HadoopConf.buildWithReadOnlyConfig(ReadonlyConfig.fromConfig(pluginConfig)); } @Override diff --git a/seatunnel-dist/pom.xml b/seatunnel-dist/pom.xml index e528fa47347c..63fa1f7868f5 100644 --- a/seatunnel-dist/pom.xml +++ b/seatunnel-dist/pom.xml @@ -100,7 +100,7 @@ 3.1.0 - 3.0.0 + 3.1.4 2.4.7 3.1.4 1.11.271 diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/pom.xml b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/pom.xml new file mode 100644 index 000000000000..e33b6273a893 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/pom.xml @@ -0,0 +1,77 @@ + + + + 4.0.0 + + org.apache.seatunnel + seatunnel-connector-v2-e2e + ${revision} + + + connector-file-s3-e2e + SeaTunnel : E2E : Connector V2 : File S3 + + 3.1.4 + + + + + org.apache.seatunnel + connector-file-s3 + ${project.version} + test + + + org.apache.seatunnel + connector-fake + ${project.version} + test + + + org.apache.hadoop + hadoop-aws + ${hadoop-aws.version} + test + + + com.amazonaws + aws-java-sdk-bundle + 1.11.271 + test + + + org.apache.seatunnel + seatunnel-hadoop3-3.1.4-uber + ${project.version} + optional + test + + + org.apache.avro + avro + + + + + org.apache.seatunnel + connector-assert + ${project.version} + test + + + + diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/s3/S3FileIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/s3/S3FileIT.java new file mode 100644 index 000000000000..b2ba903518ed --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/s3/S3FileIT.java @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.e2e.connector.file.s3; + +import org.apache.seatunnel.e2e.common.TestSuiteBase; +import org.apache.seatunnel.e2e.common.container.ContainerExtendedFactory; +import org.apache.seatunnel.e2e.common.container.TestContainer; +import org.apache.seatunnel.e2e.common.container.TestHelper; +import org.apache.seatunnel.e2e.common.junit.TestContainerExtension; +import org.apache.seatunnel.e2e.common.util.ContainerUtil; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.TestTemplate; +import org.testcontainers.containers.Container; + +import io.airlift.compress.lzo.LzopCodec; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + +@Disabled("have no s3 environment to run this test") +public class S3FileIT extends TestSuiteBase { + + public static final String S3_SDK_DOWNLOAD = + "https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.11.271/aws-java-sdk-bundle-1.11.271.jar"; + public static final String HADOOP_S3_DOWNLOAD = + "https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/3.1.4/hadoop-aws-3.1.4.jar"; + + @TestContainerExtension + private final ContainerExtendedFactory extendedFactory = + container -> { + Container.ExecResult extraCommands = + container.execInContainer( + "bash", + "-c", + "mkdir -p /tmp/seatunnel/plugins/s3/lib && cd /tmp/seatunnel/plugins/s3/lib && curl -O " + + S3_SDK_DOWNLOAD); + Assertions.assertEquals(0, extraCommands.getExitCode()); + + extraCommands = + container.execInContainer( + "bash", + "-c", + "cd /tmp/seatunnel/plugins/s3/lib && curl -O " + + HADOOP_S3_DOWNLOAD); + Assertions.assertEquals(0, extraCommands.getExitCode()); + }; + + /** Copy data files to s3 */ + @TestTemplate + public void testS3FileReadAndWrite(TestContainer container) + throws IOException, InterruptedException { + // Copy test files to s3 + S3Utils s3Utils = new S3Utils(); + try { + s3Utils.uploadTestFiles( + "/json/e2e.json", + "test/seatunnel/read/json/name=tyrantlucifer/hobby=coding/e2e.json", + true); + Path jsonLzo = convertToLzoFile(ContainerUtil.getResourcesFile("/json/e2e.json")); + s3Utils.uploadTestFiles( + jsonLzo.toString(), "test/seatunnel/read/lzo_json/e2e.json", false); + s3Utils.uploadTestFiles( + "/text/e2e.txt", + "test/seatunnel/read/text/name=tyrantlucifer/hobby=coding/e2e.txt", + true); + s3Utils.uploadTestFiles( + "/text/e2e_delimiter.txt", "test/seatunnel/read/text_delimiter/e2e.txt", true); + s3Utils.uploadTestFiles( + "/text/e2e_time_format.txt", + "test/seatunnel/read/text_time_format/e2e.txt", + true); + Path txtLzo = convertToLzoFile(ContainerUtil.getResourcesFile("/text/e2e.txt")); + s3Utils.uploadTestFiles( + txtLzo.toString(), "test/seatunnel/read/lzo_text/e2e.txt", false); + s3Utils.uploadTestFiles( + "/excel/e2e.xlsx", + "test/seatunnel/read/excel/name=tyrantlucifer/hobby=coding/e2e.xlsx", + true); + s3Utils.uploadTestFiles( + "/orc/e2e.orc", + "test/seatunnel/read/orc/name=tyrantlucifer/hobby=coding/e2e.orc", + true); + s3Utils.uploadTestFiles( + "/parquet/e2e.parquet", + "test/seatunnel/read/parquet/name=tyrantlucifer/hobby=coding/e2e.parquet", + true); + s3Utils.uploadTestFiles( + "/excel/e2e.xlsx", + "test/seatunnel/read/excel_filter/name=tyrantlucifer/hobby=coding/e2e_filter.xlsx", + true); + s3Utils.createDir("tmp/fake_empty"); + } finally { + s3Utils.close(); + } + + TestHelper helper = new TestHelper(container); + + helper.execute("/excel/fake_to_s3_excel.conf"); + helper.execute("/excel/s3_excel_to_assert.conf"); + helper.execute("/excel/s3_excel_projection_to_assert.conf"); + // test write s3 text file + helper.execute("/text/fake_to_s3_file_text.conf"); + helper.execute("/text/s3_file_text_lzo_to_assert.conf"); + helper.execute("/text/s3_file_delimiter_assert.conf"); + helper.execute("/text/s3_file_time_format_assert.conf"); + // test read skip header + helper.execute("/text/s3_file_text_skip_headers.conf"); + // test read s3 text file + helper.execute("/text/s3_file_text_to_assert.conf"); + // test read s3 text file with projection + helper.execute("/text/s3_file_text_projection_to_assert.conf"); + // test write s3 json file + helper.execute("/json/fake_to_s3_file_json.conf"); + // test read s3 json file + helper.execute("/json/s3_file_json_to_assert.conf"); + helper.execute("/json/s3_file_json_lzo_to_console.conf"); + // test write s3 orc file + helper.execute("/orc/fake_to_s3_file_orc.conf"); + // test read s3 orc file + helper.execute("/orc/s3_file_orc_to_assert.conf"); + // test read s3 orc file with projection + helper.execute("/orc/s3_file_orc_projection_to_assert.conf"); + // test write s3 parquet file + helper.execute("/parquet/fake_to_s3_file_parquet.conf"); + // test read s3 parquet file + helper.execute("/parquet/s3_file_parquet_to_assert.conf"); + // test read s3 parquet file with projection + helper.execute("/parquet/s3_file_parquet_projection_to_assert.conf"); + // test read filtered s3 file + helper.execute("/excel/s3_filter_excel_to_assert.conf"); + + // test read empty directory + helper.execute("/json/s3_file_to_console.conf"); + helper.execute("/parquet/s3_file_to_console.conf"); + } + + private Path convertToLzoFile(File file) throws IOException { + LzopCodec lzo = new LzopCodec(); + Path path = Paths.get(file.getAbsolutePath() + ".lzo"); + OutputStream outputStream = lzo.createOutputStream(Files.newOutputStream(path)); + outputStream.write(Files.readAllBytes(file.toPath())); + outputStream.close(); + return path; + } +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/s3/S3FileWithMultipleTableIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/s3/S3FileWithMultipleTableIT.java new file mode 100644 index 000000000000..34fd443146ca --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/s3/S3FileWithMultipleTableIT.java @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.e2e.connector.file.s3; + +import org.apache.seatunnel.e2e.common.TestSuiteBase; +import org.apache.seatunnel.e2e.common.container.ContainerExtendedFactory; +import org.apache.seatunnel.e2e.common.container.TestContainer; +import org.apache.seatunnel.e2e.common.container.TestHelper; +import org.apache.seatunnel.e2e.common.junit.TestContainerExtension; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.TestTemplate; +import org.testcontainers.containers.Container; + +import java.io.IOException; + +@Disabled("have no s3 environment to run this test") +public class S3FileWithMultipleTableIT extends TestSuiteBase { + + public static final String S3_SDK_DOWNLOAD = + "https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.11.271/aws-java-sdk-bundle-1.11.271.jar"; + public static final String HADOOP_S3_DOWNLOAD = + "https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/3.1.4/hadoop-aws-3.1.4.jar"; + + @TestContainerExtension + private final ContainerExtendedFactory extendedFactory = + container -> { + Container.ExecResult extraCommands = + container.execInContainer( + "bash", + "-c", + "mkdir -p /tmp/seatunnel/plugins/s3/lib && cd /tmp/seatunnel/plugins/s3/lib && curl -O " + + S3_SDK_DOWNLOAD); + Assertions.assertEquals(0, extraCommands.getExitCode()); + + extraCommands = + container.execInContainer( + "bash", + "-c", + "cd /tmp/seatunnel/plugins/s3/lib && curl -O " + + HADOOP_S3_DOWNLOAD); + Assertions.assertEquals(0, extraCommands.getExitCode()); + + extraCommands = + container.execInContainer( + "bash", + "-c", + "cd /tmp/seatunnel/lib && curl -O " + S3_SDK_DOWNLOAD); + Assertions.assertEquals(0, extraCommands.getExitCode()); + + extraCommands = + container.execInContainer( + "bash", + "-c", + "cd /tmp/seatunnel/lib && curl -O " + HADOOP_S3_DOWNLOAD); + Assertions.assertEquals(0, extraCommands.getExitCode()); + }; + + /** Copy data files to s3 */ + @TestTemplate + public void addTestFiles(TestContainer container) throws IOException, InterruptedException { + // Copy test files to s3 + S3Utils s3Utils = new S3Utils(); + try { + s3Utils.uploadTestFiles( + "/json/e2e.json", + "test/seatunnel/read/json/name=tyrantlucifer/hobby=coding/e2e.json", + true); + s3Utils.uploadTestFiles( + "/text/e2e.txt", + "test/seatunnel/read/text/name=tyrantlucifer/hobby=coding/e2e.txt", + true); + s3Utils.uploadTestFiles( + "/excel/e2e.xlsx", + "test/seatunnel/read/excel/name=tyrantlucifer/hobby=coding/e2e.xlsx", + true); + s3Utils.uploadTestFiles( + "/orc/e2e.orc", + "test/seatunnel/read/orc/name=tyrantlucifer/hobby=coding/e2e.orc", + true); + s3Utils.uploadTestFiles( + "/parquet/e2e.parquet", + "test/seatunnel/read/parquet/name=tyrantlucifer/hobby=coding/e2e.parquet", + true); + s3Utils.createDir("tmp/fake_empty"); + } finally { + s3Utils.close(); + } + } + + @TestTemplate + public void testFakeToS3FileInMultipleTableMode_text(TestContainer testContainer) + throws IOException, InterruptedException { + TestHelper helper = new TestHelper(testContainer); + helper.execute("/text/fake_to_s3_file_with_multiple_table.conf"); + } + + @TestTemplate + public void testS3FileReadAndWriteInMultipleTableMode_excel(TestContainer container) + throws IOException, InterruptedException { + TestHelper helper = new TestHelper(container); + helper.execute("/excel/s3_excel_to_assert_with_multipletable.conf"); + } + + @TestTemplate + public void testS3FileReadAndWriteInMultipleTableMode_json(TestContainer container) + throws IOException, InterruptedException { + TestHelper helper = new TestHelper(container); + helper.execute("/json/s3_file_json_to_assert_with_multipletable.conf"); + } + + @TestTemplate + public void testS3FileReadAndWriteInMultipleTableMode_orc(TestContainer container) + throws IOException, InterruptedException { + TestHelper helper = new TestHelper(container); + helper.execute("/orc/s3_file_orc_to_assert_with_multipletable.conf"); + } + + @TestTemplate + public void testS3FileReadAndWriteInMultipleTableMode_parquet(TestContainer container) + throws IOException, InterruptedException { + TestHelper helper = new TestHelper(container); + helper.execute("/parquet/s3_file_parquet_to_assert_with_multipletable.conf"); + } + + @TestTemplate + public void testS3FileReadAndWriteInMultipleTableMode_text(TestContainer container) + throws IOException, InterruptedException { + TestHelper helper = new TestHelper(container); + helper.execute("/text/s3_file_text_to_assert_with_multipletable.conf"); + } +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/s3/S3Utils.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/s3/S3Utils.java new file mode 100644 index 000000000000..63789b0d2816 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/s3/S3Utils.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.e2e.connector.file.s3; + +import org.apache.seatunnel.e2e.common.util.ContainerUtil; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.BasicAWSCredentials; +import com.amazonaws.client.builder.AwsClientBuilder; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import com.amazonaws.services.s3.model.ObjectMetadata; +import com.amazonaws.services.s3.model.PutObjectRequest; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.InputStream; + +public class S3Utils { + private static Logger logger = LoggerFactory.getLogger(S3Utils.class); + private static final String ACCESS_KEY = "XXXXXX"; + private static final String SECRET_KEY = "AWS_XXXX"; + private static final String REGION = "cn-north-1"; + private static final String ENDPOINT = + "s3.cn-north-1.amazonaws.com.cn"; // For example, "https://s3.amazonaws.com" + private String bucket = "ws-package"; + + private final AmazonS3 s3Client; + + public S3Utils() { + BasicAWSCredentials credentials = new BasicAWSCredentials(ACCESS_KEY, SECRET_KEY); + + this.s3Client = + AmazonS3ClientBuilder.standard() + .withCredentials(new AWSStaticCredentialsProvider(credentials)) + .withEndpointConfiguration( + new AwsClientBuilder.EndpointConfiguration(ENDPOINT, REGION)) + .build(); + } + + public void uploadTestFiles( + String filePath, String targetFilePath, boolean isFindFromResource) { + File resourcesFile = null; + if (isFindFromResource) { + resourcesFile = ContainerUtil.getResourcesFile(filePath); + } else { + resourcesFile = new File(filePath); + } + s3Client.putObject(bucket, targetFilePath, resourcesFile); + } + + public void createDir(String dir) { + ObjectMetadata metadata = new ObjectMetadata(); + metadata.setContentLength(0); + InputStream emptyContent = new ByteArrayInputStream(new byte[0]); + PutObjectRequest putObjectRequest = + new PutObjectRequest(bucket, dir, emptyContent, metadata); + s3Client.putObject(putObjectRequest); + } + + public void close() { + if (s3Client != null) { + s3Client.shutdown(); + } + } + + public static void main(String[] args) { + S3Utils s3Utils = new S3Utils(); + s3Utils.uploadTestFiles( + "/Users/gaojun/workspace/seatunnel/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/excel/e2e.xlsx", + "test/test/seatunnel/e2e.xlsx", + false); + + s3Utils.createDir("test/test1"); + s3Utils.uploadTestFiles( + "/Users/gaojun/workspace/seatunnel/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/excel/e2e.xlsx", + "test/test1/seatunnel/e2e.xlsx", + false); + } +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/excel/e2e.xlsx b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/excel/e2e.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..87d363d7db3bbe23b416ef1297979146b04423ea GIT binary patch literal 5823 zcmaJ_bzGF~wxyc^1f)S41PSSu?nYve5{8DMTRKFBPDyD=k(LfA0V(NbC~093kh;V7 zoqObZ&-wPuJ2QX0d;gxjo;~kc&(csvMj=ALz`#J@seG=Ba4!hqzYV3qj&2}FHxnIi zCy=WVhnItWo%Wz(7Z?6ja{u&WewfB{;vi|7zK10+#lW;tI-J-~RM<&UZRRq5we=JL*)9UEcSJ@C%X7$l8$-|S-gEj>L zm0s)UgtuCeb<^^Br^a#2RmkYRgg?SD%6*pNzEo03#Ygf`#$XS=-C45c7_dIqs~xSE z3j)|~ybqPuoP4*N_ByRP=+)OxDi_a%9}vl86TS&9xL{n0`1n3mYE&8~JM1*3i zCP#LDko!b%p7rd-4274tSoy&-;?8ZSH3c-fZvTc?d6hEg@A z6AM0+GKc=M{jG~Tk}~5-!5GG`N_>SOwaXz?h~%Mb#>_-rbBvWL>;MQK_aT z)RC*Byz(8<@p6=9=cq~*zex40a7oY?<+bBpHZBugoniR{ej(qrv+CzL6Axc(1N{2$ zTTO7k9dob?=&m1!xOt_|Ttu<~h3lU%b&9_*i=6}gwc zYh#?Ex(1HZzolg$?!fbKIFjmc*b$UHQ~IYZasfYpU|Pds3a8BCAc7D zJ{E@f>H*&zkD1+ggeh4diP+8$Fpu zO(+sBee2YZ{DlEa|GesB{Go-3u~CEJIFYwzk$3#V+CoN=Po^8QuBh#sR;$yqjhjy& zj+<9Ex4->#`YBj~Pax+G!**h^DpdI2r3!xicMOT(7<$=rx>}pLfGo7#Tx=YzT<>6X zPqovgQzQ&)Z*LD?;d!~pSqlx14t^7i!P3P2LoNiD*X0oHbL91*Z-TQY9U8x7@V2%a zv4MelHv7oUd77F_hF3Dv$!votXSE+k_URUQ!2bNnvW=eFi<{V!+pI#Hl--f(nmt3~ z)#OulgG%V}2gLHH%IzES2K6ji*+tiyqS51Pegq>&!=`QNa%AhaUg;dV&ajl2HNgxu zHB|FWH*&_Bs|$nl%pB0x^{sk-&#aAyzlRd?Esk8-^I8!~wHjh1G0MZ?twh0eD*t%K z8l$I!G3C6`Oo{o+lvI0+-TBav0a4+nSNtYtOh4j59tN4z-eO;3TCN_R5z2VmYflIc z=Y5ZFso0!fMh{r07y%IgTz z4OV7dKU>x_Ff!iD3n{F4vMwkPmU#kO+3uzpx{jv-bU6p(Lv0o+9<5g`eBsEw%$nW@ zG&MH|+!j0$kC?RDHn?n>4t10n6Af#djhSbKZB+_Wuis+btQ-5#KhZAjc+0)SW%W_4 zvXNkcOM695m@Ul$r}?R|VJGS(bqgd>k&jd+kryC{7pl3i8pPw6ti!IMD+t1!(&-Xm zOd<+mc^ZSm?a64g@xr zC_@|Fi&e-*YALHIO%0U-OSuvaG;Byr>EZ(2^np$zG%P zMyE%g?Lf8i*?4L~89F8sfGK_iPB$U5j3%qWN|6`N7CbyUFKGvqIHk_k9+ z$~8o|k>*K_MtnvmyzP{~{`K-kPR7KjxVHQ3i**CTV3~N&I?1We2c?^*3VC?PQHsSG zaZ5!cMx}wnYjr0`*NwP(8Dcc1={#0}g-J6s9`Y%je6R3X<^r8Jy(ERdi{T?trq+!d ztPG0QqLIkWKJzxMaGne5(n({#$?c%R8c6VbmrY8TvZ5h-*om?bXj7(X;d|ImKDKop zh3i45gMd8sl^%(Gjc+2ojBe<)w__`UZPkOuNfPp%&_PNP#>NO!c}TYQF1cC^gjYvrSxMIKc{Gj&&Q_{}qS1~tNt`fZ1cZh5#0HxPR z`~ApfNm;ug71m z9?#GvI6QDPriISPCLa+B`>-)vyc%%HmL28H&j4bGWkCA)kn$67F$!Qjb_~8;`23}0 z@)~hRj*(DT)Cwverg1#E%#;@{=;*6bT|$K`%ORG{uI?A=h6I8h5pMwH^b7@E2A$8+ z!iT%!MvXPsN$HmZbNA-4LOWd^W)Bu}`KkAAmXa^|Wz~qRKJHKnk|Ij!)`|1pp_Lsb zlMM5~Ak<2;YA-q`EOJ#zP2*(sPHWoP1Znq9mCalFN%Kt{PQEXLh#yExZEUX9slOjO zEWPG9UDvHp=HOWr?*;h;=6uDTTRWAQC z!HQNexT3EIX}HG^k3UEKFjc}3OSu1LF;KWv-WTaH^|hi%7yS#Cox~quN(n@ z35>FEgEAdSX?ZMK8vShY$opvbR|j$$RwcQVxdJrx^rn{( zfWPsH^9`GG@p<>!75I)diCREfFf;^&Fr0s4P27L6rkl4t=r6jgOHx(8lbMHCZ&=Ra zx!(ns>;n?fC|8gaUexq1(j99uk^wJH1y#wi5NHv1dt#tx9}}B~tqd#ru?0CRShoj$ zq&?tDCLS-x`gLAKYBXx#opd?+xX}a|B^U{c-2}*e`*gF6&-x(N0U6EHc9$2g(T#1B z0{z7~%lI4BUMR$d1NVt*%@{V7Xvto0XmIv{6vJDJpb87CB!-0rEEemik?o%z3@9Lz z&76Q#^~w*$K=syJxwL)kh}m**-H@G~xUhIGtH$O{a*I<~yq7!6`||1-1$t9#jP`9M z7hG1mugNcj$wK(1wdz(qNe7PLp?<}ui+xFM#!EXO}IU6p8uKw}# zfcN*tdV*c-UV_1PcSY;|H^-ablDR0K%Gg?a9kNQtfj*$%##T z+z~%v%L}$Fp7rn1%!(IWx5%9%)kyO@kP-%ZEw?X;chTMO#=i({84IJD&EnTMr;#(lf zx^J_ORyL#8H8#_gF}8=peidD#-3_}O+G6T7ykoOL}mAF$$8?ckID z?DoyrE9db}(tJlw^U>X@YXoyPXKOk#<;(5%oaXMSXXqj@2FQ^wUhR_q*F^6=I{L(r zN}J9{s4?`o0OK=tfiqJXFL{wTR!-znWW_d9zcH=Bf|8XcVau!7luLh{ z^Zr!_xp8cd=D?6v${OLQF=*)Yhx4MF!uIRQ04E^@sc+ieREu@W{wT$y&5b`dvG1@6 zM;3|y2Jf8bA5T=Izvso(8U%84g)3?J7x&%ZgryF+%nRa&AD$4gFQKE1>T4%xDYA2Z zBvu{vC}y-RQOJvWxhHzV%P24H(G5z)EBFP@n>{~5rhIkvX&E%~+;w3p!nrbZsAi|? zed>I-Pv-5(+36wet>A?NOiN%nHlx&*=NnB7)Ni6*o#xG270ItS5zRKR15Hsml%Sw+Xu;TdLNPZ{>W8_%s+buw8KNY1m5XJ8d8U zGdBu2bUa9bQnJ|(_O+K9)mgK(fHP>Gry5nkcD4BecxS7D$r2v>Ij4gTbw*&IpJo*J zmtB8Z^%Lg&BL4w$h-pyN!CGQhQ11o7VGgs4jwlpAYIywyGxs2E{@%C1j`k)+QM9rQd1N!(AQ=a<6vwz( z&O~C~f9m=R{(hcCHPZePU^BNB8VWm5mXZQ`xu=L`GdxD^hLsD#h{(t2YclX{pvaSp zPFW8S-qmSyF;2K;c)iyE21RuSMe53e_I2XlwnJ?m)g~H;U@{?*j{0LIkz^Cg;s|Eq z$ti)ernKVq7nd9P0B(>>}$Me9~3@5v*fY5Kc zlG`^5y`q}VDiataDXN41A^M7M%CG4{jy3SR!tO@6H=w7$uY(} zl%M7>sOY3XjB4-e$O+7NnQRFF&YlmF&<{JBSnl;(on*sip+pcl?A`LzN;~$qhu(Q>0_9^bOnF5kfaX$C72LGB zQ`SpiV@`B*A@Na3D1$)%Zg*q-!0nK5*4&+xsF0&Cv=9Zw&*AySE)Ts^2sP1)rL8(g zRi9XKYqoPe6vo1lQIQ~EAJ9LGMANq7WhD)Acj{$B*n@PXSKvMD(;IUpR}Gd!ZS3f5 z@u9Ell^Fo*=eLep8vr!yR21IxF)@52-*2X<+fYHskB{P2=&p$0O?qvd?r3htTl5)7 zhA{=LCl29zCm0>66)HdwJBvo50Q+LycV?3flBs$f9F3R@5q#j;D-%${Glv3cyx<|# zhKA_Py%Spl@|{m*%4WPALmP8P!y{|7jSL$mByL}Y9CYPDg}#vyKG=c4H$m=n$nYpeS8HiW8LSce6MzhsdJ4d-3|E`sSd6wu3Fj__DY}oYHb*} z-h37_YB-&+b36<4yLgjxg?{(^IfYMp@Y`#=?}XiXjsG|PgY$U5 zz@1@s-!8f{%zlR-oC@yQXMZN%H)ihKq~DPSe@_1CE&Z8&-;%g9Z+?dr^1bZ;wQ~MU zzpu^ie2d>v1_$atxfp*|xGw_kG~Dl4g~M=H;opkx&(!-|d8ZP7M-#jfJoP>^{+WEA zOz!CPcYFo>mHc1G_0Kx@hvJTUe#Z`cP5)ZN|05y|WmLGLML@uWza-$>WcwF*{{u*V B1w#M; literal 0 HcmV?d00001 diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/excel/fake_to_s3_excel.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/excel/fake_to_s3_excel.conf new file mode 100644 index 000000000000..3ff0df55d343 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/excel/fake_to_s3_excel.conf @@ -0,0 +1,81 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" + "shade.identifier"=aes256 +} + +source { + FakeSource { + result_table_name = "fake" + schema = { + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + } + } + } + } +} + +sink { + S3File { + path="/test/seatunnel/sink" + "file_format_type"=excel + "schema_save_mode"="CREATE_SCHEMA_WHEN_NOT_EXIST" + "data_save_mode"="APPEND_DATA" + "access_key"="XXXXXXXX" + bucket="s3a://ws-package" + "secret_key"="AWS_XXXX" + "fs.s3a.endpoint"="s3.cn-north-1.amazonaws.com.cn" + "fs.s3a.aws.credentials.provider"="org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/excel/s3_excel_projection_to_assert.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/excel/s3_excel_projection_to_assert.conf new file mode 100644 index 000000000000..0838c32003b6 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/excel/s3_excel_projection_to_assert.conf @@ -0,0 +1,109 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + S3File { + fs.s3a.endpoint="s3.cn-north-1.amazonaws.com.cn" + fs.s3a.aws.credentials.provider = "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" + access_key = "XXXXXX" + secret_key = "AWS_XXXX" + bucket = "s3a://ws-package" + path = "/test/seatunnel/read/excel" + result_table_name = "fake" + file_format_type = excel + field_delimiter = ; + read_columns = [c_string, c_boolean] + skip_header_row_number = 1 + schema = { + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + } + } + } + } +} + +sink { + Assert { + rules { + row_rules = [ + { + rule_type = MAX_ROW + rule_value = 5 + } + ], + field_rules = [ + { + field_name = c_string + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_boolean + field_type = boolean + field_value = [ + { + rule_type = NOT_NULL + } + ] + } + ] + } + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/excel/s3_excel_to_assert.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/excel/s3_excel_to_assert.conf new file mode 100644 index 000000000000..f1b9bbb1395e --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/excel/s3_excel_to_assert.conf @@ -0,0 +1,135 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + S3File { + fs.s3a.endpoint="s3.cn-north-1.amazonaws.com.cn" + fs.s3a.aws.credentials.provider = "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" + access_key = "XXXXXX" + secret_key = "AWS_XXXX" + bucket = "s3a://ws-package" + path = "/test/seatunnel/read/excel" + result_table_name = "fake" + file_format_type = excel + field_delimiter = ; + skip_header_row_number = 1 + schema = { + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + } + } + } + } +} + +sink { + Assert { + rules { + row_rules = [ + { + rule_type = MAX_ROW + rule_value = 5 + } + ], + field_rules = [ + { + field_name = c_string + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_boolean + field_type = boolean + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_double + field_type = double + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = name + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = hobby + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + } + ] + } + } +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/excel/s3_excel_to_assert_with_multipletable.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/excel/s3_excel_to_assert_with_multipletable.conf new file mode 100644 index 000000000000..2c1e37a7b6d1 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/excel/s3_excel_to_assert_with_multipletable.conf @@ -0,0 +1,134 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + S3File { + tables_configs = [ + { + fs.s3a.endpoint="s3.cn-north-1.amazonaws.com.cn" + fs.s3a.aws.credentials.provider = "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" + access_key = "XXXXXX" + secret_key = "AWS_XXXX" + bucket = "s3a://ws-package" + path = "/test/seatunnel/read/excel" + file_format_type = excel + field_delimiter = ; + skip_header_row_number = 1 + schema = { + table = "fake01" + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + } + } + } + }, + { + fs.s3a.endpoint="s3.cn-north-1.amazonaws.com.cn" + fs.s3a.aws.credentials.provider = "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" + access_key = "XXXXXX" + secret_key = "AWS_XXXX" + bucket = "s3a://ws-package" + path = "/test/seatunnel/read/excel" + file_format_type = excel + field_delimiter = ; + skip_header_row_number = 1 + schema = { + table = "fake02" + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + } + } + } + } + ] + result_table_name = "fake" + } +} + +sink { + Assert { + rules { + table-names = ["fake01", "fake02"] + } + } +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/excel/s3_filter_excel_to_assert.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/excel/s3_filter_excel_to_assert.conf new file mode 100644 index 000000000000..d7866c21f638 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/excel/s3_filter_excel_to_assert.conf @@ -0,0 +1,136 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + S3File { + fs.s3a.endpoint="s3.cn-north-1.amazonaws.com.cn" + fs.s3a.aws.credentials.provider = "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" + access_key = "XXXXXX" + secret_key = "AWS_XXXX" + bucket = "s3a://ws-package" + path = "/test/seatunnel/read/excel_filter" + result_table_name = "fake" + file_format_type = excel + field_delimiter = ; + skip_header_row_number = 1 + file_filter_pattern = "e2e_filter.*" + schema = { + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + } + } + } + } +} + +sink { + Assert { + rules { + row_rules = [ + { + rule_type = MAX_ROW + rule_value = 5 + } + ], + field_rules = [ + { + field_name = c_string + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_boolean + field_type = boolean + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_double + field_type = double + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = name + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = hobby + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + } + ] + } + } +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/json/e2e.json b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/json/e2e.json new file mode 100644 index 000000000000..aff56314e7c4 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/json/e2e.json @@ -0,0 +1,5 @@ +{"c_map":{"ccQcS":"PrhhP","ypJZu":"MsOdX","YFBJW":"iPXGR","ipjwT":"kcgPQ","EpKKR":"jgRfX"},"c_array":[887776100,1633238485,1009033208,600614572,1487972145],"c_string":"WArEB","c_boolean":false,"c_tinyint":-90,"c_smallint":15920,"c_int":1127427935,"c_bigint":4712806879122100224,"c_float":1.620476E38,"c_double":2.750908810407852E307,"c_bytes":"Q3NrVnQ=","c_date":"2022-04-27","c_decimal":88574263949141714798.835853182708550244,"c_timestamp":"2022-01-26T17:39:00","c_row":{"C_MAP":{"IVaKD":"bydeV","CnKBd":"kcZdt","RGlmG":"XuMyE","krSIr":"FPeal","IfhvE":"ReKxo"},"C_ARRAY":[86555282,967939739,1162972923,1662468723,546056811],"C_STRING":"bYjyZ","C_BOOLEAN":false,"C_TINYINT":-121,"C_SMALLINT":29252,"C_INT":977226449,"C_BIGINT":5047232039582494720,"C_FLOAT":2.5345643E38,"C_DOUBLE":1.5883424829997996E308,"C_BYTES":"TEVLTHU=","C_DATE":"2022-04-25","C_DECIMAL":55295207715324162970.316560703127334413,"C_TIMESTAMP":"2022-06-14T23:03:00"}} +{"c_map":{"AKiQx":"wIIdk","zgunZ":"qvHRy","ohVQL":"WfBPo","EzUcN":"yPhVF","qusBc":"FWbcI"},"c_array":[1837821269,980724530,2085935679,386596035,1433416218],"c_string":"LGMAw","c_boolean":false,"c_tinyint":-65,"c_smallint":25802,"c_int":1312064317,"c_bigint":4434124023629949952,"c_float":1.0186125E38,"c_double":3.0746920457833206E307,"c_bytes":"V2pjem4=","c_date":"2022-04-21","c_decimal":1943815605574160687.499688237951975681,"c_timestamp":"2022-08-09T09:32:00","c_row":{"C_MAP":{"qMdUz":"ylcLM","bcwFI":"qgkJT","lrPiD":"JRdjf","zmRix":"uqOKy","NEHDJ":"tzJbU"},"C_ARRAY":[951883741,2012849301,1709478035,1095210330,94263648],"C_STRING":"VAdKg","C_BOOLEAN":true,"C_TINYINT":-121,"C_SMALLINT":24543,"C_INT":1853224936,"C_BIGINT":6511613165105889280,"C_FLOAT":2.4886748E38,"C_DOUBLE":1.675530128024138E308,"C_BYTES":"UnNlRXo=","C_DATE":"2022-01-26","C_DECIMAL":50854841532374241314.109746688054104586,"C_TIMESTAMP":"2022-02-18T22:33:00"}} +{"c_map":{"VLlqs":"OwUpp","MWXek":"KDEYD","RAZII":"zGJSJ","wjBNl":"IPTvu","YkGPS":"ORquf"},"c_array":[1530393427,2055877022,1389865473,926021483,402841214],"c_string":"TNcNF","c_boolean":false,"c_tinyint":-93,"c_smallint":26429,"c_int":1890712921,"c_bigint":78884499049828080,"c_float":7.816842E37,"c_double":7.852574522011583E307,"c_bytes":"cHhzZVA=","c_date":"2022-06-05","c_decimal":32486229951636021942.906126821535443395,"c_timestamp":"2022-04-09T16:03:00","c_row":{"C_MAP":{"yIfRN":"gTBEL","oUnIJ":"GtmSz","IGuwP":"TyCOu","BwTUT":"HgnUn","MFrOg":"csTeq"},"C_ARRAY":[306983370,1604264996,2038631670,265692923,717846839],"C_STRING":"wavDf","C_BOOLEAN":true,"C_TINYINT":-48,"C_SMALLINT":29740,"C_INT":1691565731,"C_BIGINT":6162480816264462336,"C_FLOAT":3.3218342E38,"C_DOUBLE":9.993666902591773E307,"C_BYTES":"RnVoR0Q=","C_DATE":"2022-04-09","C_DECIMAL":81349181592680914623.14214231545254843,"C_TIMESTAMP":"2022-11-06T02:58:00"}} +{"c_map":{"OSHIu":"FlSum","MaSwp":"KYQkK","iXmjf":"zlkgq","jOBeN":"RDfwI","mNmag":"QyxeW"},"c_array":[1632475346,1988402914,1222138765,1952120146,1223582179],"c_string":"fUmcz","c_boolean":false,"c_tinyint":86,"c_smallint":2122,"c_int":798530029,"c_bigint":4622710207120546816,"c_float":2.7438526E38,"c_double":3.710018378162975E306,"c_bytes":"WWlCdWk=","c_date":"2022-10-08","c_decimal":21195432655142738238.345609599825344131,"c_timestamp":"2022-01-12T10:58:00","c_row":{"C_MAP":{"HdaHZ":"KMWIb","ETTGr":"zDkTq","kdTfa":"AyDqd","beLSj":"gCVdP","RDgtj":"YhJcx"},"C_ARRAY":[1665702810,2138839494,2129312562,1248002085,1536850903],"C_STRING":"jJotn","C_BOOLEAN":false,"C_TINYINT":90,"C_SMALLINT":5092,"C_INT":543799429,"C_BIGINT":3526775209703891968,"C_FLOAT":1.9285203E37,"C_DOUBLE":1.1956984788876983E308,"C_BYTES":"RVd4a1g=","C_DATE":"2022-09-19","C_DECIMAL":86909407361565847023.835229924753629936,"C_TIMESTAMP":"2022-09-15T18:06:00"}} +{"c_map":{"aDAzK":"sMIOi","NSyDX":"TKSoT","JLxhC":"NpeWZ","LAjup":"KmHDA","HUIPE":"yAOKq"},"c_array":[1046349188,1243865078,849372657,522012053,644827083],"c_string":"pwRSn","c_boolean":true,"c_tinyint":55,"c_smallint":14285,"c_int":290002708,"c_bigint":4717741595193431040,"c_float":3.0965473E38,"c_double":1.2984472295257766E308,"c_bytes":"TE1oUWg=","c_date":"2022-05-05","c_decimal":75406296065465000885.249652183329686608,"c_timestamp":"2022-07-05T14:40:00","c_row":{"C_MAP":{"WTqxL":"RuJsv","UXnhR":"HOjTp","EeFOQ":"PSpGy","YtxFI":"ACjTB","YAlWV":"NlOjQ"},"C_ARRAY":[1610325348,1432388472,557306114,590115029,1704913966],"C_STRING":"Pnkxe","C_BOOLEAN":false,"C_TINYINT":-15,"C_SMALLINT":8909,"C_INT":2084130154,"C_BIGINT":3344333580258222592,"C_FLOAT":3.3306473E38,"C_DOUBLE":9.233143817392184E307,"C_BYTES":"enpuUXk=","C_DATE":"2022-07-01","C_DECIMAL":87998983887293909887.925694693860636437,"C_TIMESTAMP":"2022-02-12T07:45:00"}} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/json/e2e.json.lzo b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/json/e2e.json.lzo new file mode 100644 index 0000000000000000000000000000000000000000..26c577d660596a38b15532bcc22957585ef2853b GIT binary patch literal 3466 zcmY*cOK%(38Kq=dcHC&3#z~MAAmC6=q{u`1zUSUi(uNW#k)lLWq_vo%8vJ(@VL~z_ssOd*Owf`RCs4xg5Iq^Mfa~-+$}J7nknpr9Xy3FV2KQ z0}G+h=m(L?@>Y2#;^U$+TPa5TNPcg9Js)u+ot>$rHt@`TuDSrcI59Rg54@3Im@EKq z?9>ke;LXZfeik^nlgSi->uZJ8g~)vuE0_27%AJV+p4Oh{DN2Y-73Yj=p`{CG2oTY( zB1BOkJ?2uOJ>xNe_w54vt-X!r8Wx?8?GP*KY0Y5(<^`1JGz#@dqEcTRf-jOQ_?gfJItq$f{$LoZ9zhe1Z%NX>)+K*`6K zC#G|8tj1!J3#kM@^Fz+~aw1n6o5t7CnADmJCNwj~c*dZ@M0c+}zZfKK69mb*=^#~l z=j4^1L_A0i-R$o{@uV$Yay*T*MSL(}BqN?jC1>J-1`^{`Nku%up*7Bhp!_%CzL{h( zh|lCvJV!;T2pIPX{*uW3``6xj;)G7(nT^>)?0S$+S2w}W(OSE?1iX7MRp6>Fa_1cyYzjo-O*M^=t zp_ix;TvHG!(H~UV?u=ncr8VQ8k<@rrT;klhqNBtF#Q2=4b0;*qJ5wzkSz;QM=@}5S zQaPAN1MjYFP6fb?z5Ir4jj2Mlz6yM_RoJk^w|8?HOLR7wN=yN_j;2;hE>?_SHwXo% z0BJA^VK|{K^$2vPFIXcG0fmnd*VtB8Lcf37{HonM7q4d49^4)J*lF#xzxR7qFI-M%(#x7JAI{Q zwozEvrsqzNwS$MRf!)vzR8?qU<%k2@XQ_x$Vn8{VBT~Suq&|IxoICw2N_D`P&#^>k zbZ)w_YkezsP}`--IV!M!%{;^t)D zc1^CZ+g^=g#n*9QZqQ#IXp<855C3ypL5e1CJ$9~mF-?^gFyhk>zWCAslnmmL z3}vHI^VpfwS4piOEzQO085WKbdG_ki8N`O>)rvv165(V|g*wIz0*NU!j^t9HG0gD^ zNrn4_l^a!`eauqf&}b*UTCi=n7K|mQLBn>bnYQYgY;6^ffYX!h12}Rd=#1wqO=AZ^ z36>N|tu;$c;F*cNoDH|ieo)(WyQm;1%Ahek85#Z=l36%&nOd&^Q#E*@a|h-4OL zH#5K+3tMPRD6O%%whLU(jnyp0g~aMX8h9(aRkoDPb`ERv(NiZhs8Bx-9Okgbfu_CVWeX{&Pd;I8?(qgC`L4rOQm7_7Q|h+v49{O;b+ z+n|jxj}n+XRzRj{`snK4|J9H5hd)WC9{9su=PHlk8TcHGb}%*ABatNC{x`q4-8Vnq z7_ZK65^P3ElxPSJMSh7Ap?&O@DeF0&i`$RR>?KMY;gmKvL$nEzax3myvUxexzx8B*acx@@ql zxE0#YL7~`05_Fwgk8Zv7BVV5UdHD0&J*d5Y)Z1<#7;^0JAu(0I;qu|f&ZQtpw@dR~6UF*>&}lb3DF{eGctW@m zJ7Cz1A$Jto!DcVgRHV!+;7OFMNJG^!j|TduXSPLVIfe9tbEcG}rB5pUTw( zn-0m^L~a&1U)-6rp}E*P9Q?i;x)dL;2X;nZj5p@z@O!qAtIx(BJE5D%N2_X32H8Vp zIKGQ$hucv!t2EZ4(YQ8O;VaxI<(@gc`R3+f?Ylp9P|m?;?ocY+gBUPF$Nm+_5gkHc zHMbu-185h{(b(`UjlP#pKKQhK3D$fqv5aCMb9r{~IvKDjXG~Eyu37fMlUkj4voUu|bcJJ@yBXq%0twFXYz~ L-M7E~FW3J7kMdyE literal 0 HcmV?d00001 diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/json/fake_to_s3_file_json.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/json/fake_to_s3_file_json.conf new file mode 100644 index 000000000000..3a22956f5402 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/json/fake_to_s3_file_json.conf @@ -0,0 +1,86 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + FakeSource { + schema = { + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + } + } + } + result_table_name = "fake" + } +} + +sink { + S3File { + path = "/tmp/seatunnel/json" + row_delimiter = "\n" + partition_dir_expression = "${k0}=${v0}" + is_partition_field_write_in_file = true + file_name_expression = "${transactionId}_${now}" + file_format_type = "json" + filename_time_format = "yyyy.MM.dd" + is_enable_transaction = true + "schema_save_mode"="CREATE_SCHEMA_WHEN_NOT_EXIST" + "data_save_mode"="APPEND_DATA" + "access_key"="XXXXXX" + bucket="s3a://ws-package" + "secret_key"="AWS_XXXX" + "fs.s3a.endpoint"="s3.cn-north-1.amazonaws.com.cn" + "fs.s3a.aws.credentials.provider"="org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/json/s3_file_json_lzo_to_console.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/json/s3_file_json_lzo_to_console.conf new file mode 100644 index 000000000000..df99cd66fb86 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/json/s3_file_json_lzo_to_console.conf @@ -0,0 +1,144 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + S3File { + fs.s3a.endpoint="s3.cn-north-1.amazonaws.com.cn" + fs.s3a.aws.credentials.provider = "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" + access_key = "XXXXXX" + secret_key = "AWS_XXXX" + bucket = "s3a://ws-package" + result_table_name = "fake" + path = "/test/seatunnel/read/lzo_json" + row_delimiter = "\n" + partition_dir_expression = "${k0}=${v0}" + is_partition_field_write_in_file = true + file_name_expression = "${transactionId}_${now}" + file_format_type = "json" + compress_codec = "lzo" + filename_time_format = "yyyy.MM.dd" + is_enable_transaction = true + schema = { + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + C_MAP = "map" + C_ARRAY = "array" + C_STRING = string + C_BOOLEAN = boolean + C_TINYINT = tinyint + C_SMALLINT = smallint + C_INT = int + C_BIGINT = bigint + C_FLOAT = float + C_DOUBLE = double + C_BYTES = bytes + C_DATE = date + C_DECIMAL = "decimal(38, 18)" + C_TIMESTAMP = timestamp + } + } + } + } +} + +transform { + sql { + source_table_name = "fake" + result_table_name = "sqlresult" + query = "select * from fake where c_string = 'WArEB'" + } +} + +sink { + Assert { + source_table_name = "sqlresult" + rules { + row_rules = [ + { + rule_type = MAX_ROW + rule_value = 1 + }, + { + rule_type = MIN_ROW + rule_value = 1 + } + ], + field_rules = [ + { + field_name = c_string + field_type = string + field_value = [ + { + equals_to = "WArEB" + } + ] + }, + { + field_name = c_boolean + field_type = boolean + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_smallint + field_type = short + field_value = [ + { + equals_to = 15920 + } + ] + }, + { + field_name = c_date + field_type = date + field_value = [ + { + equals_to = "2022-04-27" + } + ] + } + ] + } + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/json/s3_file_json_to_assert.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/json/s3_file_json_to_assert.conf new file mode 100644 index 000000000000..7e2491d2f6f1 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/json/s3_file_json_to_assert.conf @@ -0,0 +1,133 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + S3File { + fs.s3a.endpoint="s3.cn-north-1.amazonaws.com.cn" + fs.s3a.aws.credentials.provider = "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" + access_key = "XXXXXX" + secret_key = "AWS_XXXX" + bucket = "s3a://ws-package" + path = "/test/seatunnel/read/json" + file_format_type = "json" + schema = { + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + C_MAP = "map" + C_ARRAY = "array" + C_STRING = string + C_BOOLEAN = boolean + C_TINYINT = tinyint + C_SMALLINT = smallint + C_INT = int + C_BIGINT = bigint + C_FLOAT = float + C_DOUBLE = double + C_BYTES = bytes + C_DATE = date + C_DECIMAL = "decimal(38, 18)" + C_TIMESTAMP = timestamp + } + } + } + result_table_name = "fake" + } +} + +sink { + Assert { + rules { + row_rules = [ + { + rule_type = MAX_ROW + rule_value = 5 + } + ], + field_rules = [ + { + field_name = c_string + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_boolean + field_type = boolean + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_double + field_type = double + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = name + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = hobby + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + } + ] + } + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/json/s3_file_json_to_assert_with_multipletable.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/json/s3_file_json_to_assert_with_multipletable.conf new file mode 100644 index 000000000000..741c26da8e5c --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/json/s3_file_json_to_assert_with_multipletable.conf @@ -0,0 +1,130 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + S3File { + tables_configs = [ + { + fs.s3a.endpoint="s3.cn-north-1.amazonaws.com.cn" + fs.s3a.aws.credentials.provider = "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" + access_key = "XXXXXX" + secret_key = "AWS_XXXX" + bucket = "s3a://ws-package" + path = "/test/seatunnel/read/json" + file_format_type = "json" + schema = { + table = "fake01" + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + C_MAP = "map" + C_ARRAY = "array" + C_STRING = string + C_BOOLEAN = boolean + C_TINYINT = tinyint + C_SMALLINT = smallint + C_INT = int + C_BIGINT = bigint + C_FLOAT = float + C_DOUBLE = double + C_BYTES = bytes + C_DATE = date + C_DECIMAL = "decimal(38, 18)" + C_TIMESTAMP = timestamp + } + } + } + }, + { + fs.s3a.endpoint="s3.cn-north-1.amazonaws.com.cn" + fs.s3a.aws.credentials.provider = "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" + access_key = "XXXXXX" + secret_key = "AWS_XXXX" + bucket = "s3a://ws-package" + path = "/test/seatunnel/read/json" + file_format_type = "json" + schema = { + table = "fake02" + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + C_MAP = "map" + C_ARRAY = "array" + C_STRING = string + C_BOOLEAN = boolean + C_TINYINT = tinyint + C_SMALLINT = smallint + C_INT = int + C_BIGINT = bigint + C_FLOAT = float + C_DOUBLE = double + C_BYTES = bytes + C_DATE = date + C_DECIMAL = "decimal(38, 18)" + C_TIMESTAMP = timestamp + } + } + } + } + ] + result_table_name = "fake" + } +} + +sink { + Assert { + rules { + table-names = ["fake01", "fake02"] + } + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/json/s3_file_to_console.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/json/s3_file_to_console.conf new file mode 100644 index 000000000000..e0d0019f17aa --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/json/s3_file_to_console.conf @@ -0,0 +1,46 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + S3File { + fs.s3a.endpoint="s3.cn-north-1.amazonaws.com.cn" + fs.s3a.aws.credentials.provider = "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" + access_key = "XXXXXX" + secret_key = "AWS_XXXX" + bucket = "s3a://ws-package" + path = "/tmp/fake_empty" + file_format_type = "json" + # schema is needed for json type + schema { + + } + } +} + +sink { + Console {} +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/orc/e2e.orc b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/orc/e2e.orc new file mode 100644 index 0000000000000000000000000000000000000000..d50f6bb54dd6af1f3bdd521dcb829d989c503c78 GIT binary patch literal 5730 zcmc&%c~nzp7JqrkgU18m5f&d&NW|KD>?x8EvYnoG*h1Js79a_yqfJ7{0$Cs-5ZW4C z+N!11Dz2lB$4-k=M{C{MTF=yR!M0=7sdcICap~sNx=^QDZLQAtK3*7e)$P6OHm#gWb}d}h{r#eiw+_$`e3)P%+zB|w z8y5!(arTO0UTHE|tUal?Qknr2uQbhOtL{F>*Pc6KTG$s(p7wq_=Ipr=2IuIX{xCS_ z9eA<>$3zw)Qi2P&4T`aT)(u)3as}ZjRJd>H2_*~3;K-F zMx#hy;Bf)F#$Kem-)O{v8;1@KAGRq2&)}xu{TmLhWRY*Izi=ptI{5adcMj7{x_gea zMr$WX#pQ3)*F>2kdSV7^0Op7yUtm!;=o(P)9xfP%1pM?Gl-x6yP4yhUcaZvTUw$oy zy!`3vcN3}WM`v!408at*EMbumz8;f>gLKCh&By@PPaze zrqeY+461V1`zz{OiwoQ~Z(GpSZr4>dG}gK6bbha0X|eg-7OPk9RtFuvHch)lZ3z0> zJ5@H-ZVE%%bvB*b)8?!7>a~?+)w=ekPD4#~d7Y`w(@|CB(1D2BTARmi^cnS45Xr1n zB~9gxo|dwjfU7L1bJljM8|s`DDx0OTx}wxx=5w@#LdswTL_bfn+TR>>*^3Hlm4UL5 z-Ww=4m{d)T#%67!UP&i_Vz=~3ix7*lLRSGHPpfm74616S)(N2x#qRX~YGG%n9-GFw zB)FO089cc%ttZGa?wGzy&R;kF?dwWS`2MI4zbelPXO0~Dg3)_1CWw*x8Otqik!Qo{R>hr&;u{P4N(iLz&JijZl~+_?~* zo%PpUi%*;mNASwjzdC%7|7AD=WH(Oqox1gI_q^=~zPPjdXY`xn`_G-exOc(gtt&Pz z(XZODbnlWw?;N_g`egT+W$4_sv)6Zh_IXd=lApKr9%mi8y?^7b7iRtaXwQ;=xlvM8 zQROTOc4!KH>Xx9@V5zSpm3 zUGD4TFU$E91*NO6Epurh))cGUenW@XY&1Ko-6eGw#iTNK6#A5bD!P02fLiNlFDbFrch~~f4xPaoa=Y9nr(0<%@w@9iO=dTQ zI-AW=*$^smX*!x43;n9vYFkUKvcz3e=&rOn>mdvlw^+2z`UZ`r-PurHq^>Pfw^W%t zUbow?)3)kuMRl&)hK`boV6k3R9CDYH>)V;};XU>=C`FLk*_)zC(Wmu4v8Rv7eEA)R z=J{@cKuM7=DOzuRwBY5n8967r(_+8cCR8pxbxwSkdcE|An47119zOrYk^av&btV4( zz4N!9pg!*2mbYtLm)*#@*0u4CH^sX@-t(-gaPgVab?YTdpRzgXJFPB#v)uq;)oy61 zG(ltqW4iuVIlm|&Z5n66w2dn_bnYwMc{a#7+BAl@>67=jG(7WxZ~2^#%JZ>K&kWAi ztyizjnYkH3dT#O0E!>TK`-tJG>09R_zdc{Oq3qg8WX*B9c+(pSB5^TM4^7Ixj~KDlM{3iMAKR=@t)-2J`lSN**0YBzTIwR7jbJ>9)}=E}L3 z=07}tU+?Po-{_w+f9dkxALp01iC^sFxb zW`$*$YemO0WssA#LayUKTBQ(FE?y+~`0P*oj>Ny9;BO}Q4+WucC|Ohjlqe;DlFdjC zqsJ&N)W=YKC}WvDPNl z_YmVh3Fe2AED8m77Ku{N0f_?1fqOQyXTv>8a+q@%g+Y4^tPe`YF#1?V;#3T@$H8~7 zNCMu+BIBtkP@h1hLOD=B7MVz?z#ao~Ade&_ZW6O6nK;QX4n`(JJxZo9_EaX%R3`5< zW}n97oz9#a3-d-virHsSjW7=$b03eL{aMUA_{=*5%sU3^%p$X?GWd>c z#&04McOnz-Atufw<{gtLl#AjB_-Hxwi|A+>Sx4;AboYxhns3y2yfFoSr+KR0VfNS^ zW+%}XOAOEy&?gOILo86sCf=8F7*m-;kw4SX_=a&Z;d)S1!&pub z5tac825K~vVSsV~o;RFZ7^&#}97ZUH^m7=Qh|*6$B0xVQO&&KQc`!Ccntm^D_)pUT zWf*MwAYA}W2W079sxa8~C^P|deLV0Ofg;@1^pW_%VACUi1f!7y(DbGU#tV$qQMx`z z2L`)-4-qg@IGg-)25$@y-{8~_ae@;uldHa{g(_`nwS*KgP-e#NGLoWkPPsr&B@Caaco12^IHSnbcaIrY; z^a@02Z1$R=s=?c4X|~a>4K}M2xX@NhC}8vRVt~#Zu<_#Qux^__V0QcHV6EQv>=-Q~u#A-Df2u()rhuTX zV18c}qkwwE77f$~{fC0OVnk3^39ntnx9VS%NVw6AdJJQqKCxvfSA6#eaANWEII=^BI!tq8WH74DT<0ZxAK}O z0v<9VMAH<7E#97XQIx{n?e5V*TiImwo1pC}scx)=)>v0k7=YI0FKcOowy9cQS_rMr zR${4yR@YeX@MLSuJ%BTnm88@$kZTFs79Xj|%x?iNv0mR4U}>OpUO zwY3Hp>+)_f{pssZjEE+3Sqv`wJvtMZn$_OhZgwMv*61xk@XDg*t{!Ns%tmb!wAyZe z6Z-Dg8;a5A@`@r&0kjr#d$Cy>CG&MUq}d90xv|*}CKuPZ%4)z9eO;ggy6f8gdM9)@ z7uzl9Nx8kJ*8;7*#$Ss8FV^~O#W209RZ}gUr_gxRm+; zo-A*Bag`HoEiEX~w1L0FBKnGme)7;Czl{?3+cDxBwZ!(>9qF^*e|5h9_R>ke+qFC{ zqoQeYb3v{CwIw;~Q!AOc^iTKD&BJli%>_rwHyrtUxB9D-iw|sa&b;T$eay0_AFsW* zEjp>+)t|X7u0#Cb(>Ki@PoJ9K{CSsRAC*a${pn0*(zd5)FsG5W(rbW(4rExrviebsO~Mc!M2k7o$Wo+sH{d`i8X-Z8nspL0Kutr4E^f=zAz9W zz?sCbIAR#jqI2kTr{4X)$%65Jv9sglfU_8W&)&X$y9rvviJHF;}oE;Hs{M6)dUl zG~ppBvl?390B0@J6}hz->SguSJ`?A{uiOHZ;QGk*6!oHxqMG3M9sE|o59aR@(R)So zpHF_bQ7RCs8L4K2m-4}8>egpBmkbZ(PLlkIK%@-achv*Z-uvpS%9N_7&Z3D~Z^{ z^sSe7y)E1ksfDb%70q8fwMIly`5(r8B$6#3nwxUsF?c-^x$`!5Q%7c|^+5|LK(fF% zA7A)vH>~;iB#VqwU0BiD1u-+!bU4eP^|8U71KB~34mU*CUYcpCk(Dvx7nyaAo>$}a!!S=Gka^&EG3Uxg$k-ghk zsfJcxYxFk5p$j;CUTLSovdmgg4$Rcmt+pcr>52^n(t3HjqrfZuS4ClKi>p&~PnMyn z%3X%*T3YVY=~#bviRtgRzV%AvME**^-!gZ18xp3@SWxGO)~u~4#M57*ZE8jXI~xsF zl4R_fnhJPtQ){%yk)>LV6&>_Cic2xMba@QLkd)-6fU_+sTVACrE5+Q<6euJ+(Op^$ zVUbrF{Cern6!n3Ms(>g)uB&Wq0WV7{EERfqt?2G`Nb42t&8C`G5N>sqXdr=%cUgknIrz?Hp}S!OE1oIe5taGt`R!XGV&qTlPjj4H zytVHS|F$FU(#+iSbN?ZVEM4Wz@yc^Eq;t;nxK`F^^^^aL8d^V~SpU|Go_Ra8Nehqb zW7bYvQAO$Uq;zOPBR?P)7O4EBqZ3ZJTVw7 z-Wh>*w^?l3a&V~DRBXmrnoNdfh;dez*W=U^$6ST2HGrg`XVBNSZrucVgFoo9SsaoG zCS=pO^u~Qd!{9eR;htu0!f7Dai|~6FewX040e+m!v!@SV5z-$oGdPo?=4VsXt8os$I-5;7#}{$k6zoUv3tpAoVD(4~ug{e$0rG?+Hj zC;qxW=9ka?weQbg{i5Ic^TEFN==w$I?v~RFvyIVvL0NH+c z{hV-hmyOhLArzmDn_2a1)!CT9Z&y9O=J(sA2R=3QZ`xscEatOCQOdpFq?j&O?0C|0 zGrBTzwQtURtK3@M8=3jNzG}2ki>|l?#uGWI319>(;{S(BZ=)I2=1YB9@Voq7YhAo6-@n z%HRO0rC@+mc7g#3RxPoN1PdEMx0F;HI3KEzTET#bie*G1L5Bou2>`%yLfDP90(4{P zAne9k0J^a>5Om9e`97AB1yjCYSc176q%f@ux@Ey!4N}2$Ef|)PJPbatX*U!M38q(o z!)z;1^|$B1oI?FF3^ppv~Md;f7}^W%RHZ zjsX=rOLA=rI%2HhWgK1qix+#5@u3tGEncF5Y@arD!{`GfIv z0SCEm4&BT>UI{&ZqT+iG&JB5aY#!e|kPxzVFfjyu>R#TVgGrF+k0ajpkYCfOP+KA$B3IFW`t!Y>IDMoRiv37M^-!|!u?nu0JCz(m;^ ztX{9nVe!D0V&DBvPr&K%v+vO$Tl|g$xxs~|PNm~%VvF5jbAq~rXnG!PNM4SAaU7LRwl(;j zZbygT;%-ZbG4xNLg|UEmkqG*{-SjN-tZ6W+%YSq#vpxRtCDw@z)q|t)9F{6o=8H=jSYGb)Dn#$u_=_xs6ZZ&rol3bQhvUeRw`dxKxa^i zSK$si4kWvmgP=oVm`hMe{$->j9Sc#fFQysgd4hTzuBj2k(4^oAVD~&hdoDuxQbgkM zNqKC}W=H~8Qb{C(vu{G&jX8k(bTXYvC0>DYRvx=3QU-*}9~{7pRsx-D5y3cXA!dAr zEdYc_+YxCn8U4Ny4dLLBLZ^*D9&Y^c0&PpBAAkjd;$Oz;2vQht^o^-tG?r2fsjJZF(3=o&=Mpgb zTq?%(JbnlW8O^ew!07YS!02EN#_)w18Xev%)Nurj?oUIbM--v`8KhRC0=DN?5e$>< zNs2j!t?h(BrxECbG~AwRV?qnYu`d94s># zzV^fbhxZEAyo74jr(=Lm(m}c^g?$X^1*jSl%h)^!ly#CO5hn#K!`8Dx5v3h1m zDUE9zdK01#lh9}2+U}0yH(e4N6j?9}lITAp0(OL1#9C5;Gi*UF#Jh!fr!z1TgYi5y za(GdE0wsVCC<|cH1O)laQrLJKGm#+I&EYq42#m|V1kA2%hDqQxmi&+q;4gX|dJ|f? z3xRiKqSsf#fTKfcoJYQb$argD$Q1&SLvKRlK}0@7kUvq4Hkqvlc+gK&0E%}GMuHa* z1P#3jK~E#-`Ybf};9UNEgcgY{EC?=?&yc?k&LzajURfCa9s?SHI7jyiHCeTE!hiywl^GXFpV zI1s!&G5nPZ9SH9gYI-pb9N4}H9T-d;y`RAWQdbd0>~#u)Z)CWIS6xS>V3 zolo66dO0K6!i=C{mOgbagafZ{47tl;I6`ki)MFTqt=VKflXyZ6?Q5>GBfvu<y(fA|TNUPiDkKQ(u1tlU;1Um;(?)Le(bGVBm!`~(`@Zr5eW#6H) zmli|dl`kYKqZ4Jy-z=B`SIuPRCv6sAd#A&nYU%QMQ#)+U4!4EbKYgXjA82!^a#cQZ zTT&&fJQg>;kGe*|K%>*)vUjL*A6yxnLoQV?!a_W#@^rdfs$?D#Zd@=(?ocSk4XfA- zlIo$=4<)PYjz&wT%dg5s(bXWzY3D9l<71OM7YsZ74z~~-E=qSo=Jby?o40EstGIhy zm|d#iz=S?=1!cH2v3erC!TaG|h$42FuXj|)R)OJyF0ZFaHKCK_dKxasc26kD7IERC zT>pf!rv%q<|L~+z%-P|nb;=&WgY`PWVXC zp_*6?6hz}I7|k-I>VxUw+(IhB@qvB>w{X?k_}F*NEx|%94Ei777QQG87ybvhh5rT; zE`5ii9xAi$LKHrv+=UcdD1{5&iCehBCtUE3+!Fk-6E1pZZsDqxaM3$*3tO0k3H~^4 z;lDVIk2{K6gcAQ8ZV?nQ<6CvN+%iU;@X%_CncnS#-0w?ud+iQaD$*z;!P16ebX=vA z;`YUNIef^I@#(4Q8L64^N!CuM%O2mDW4Af%={Xs;EL(b`6{t7cX0v9d=PX*3x!AhM fvM}9|?XYKC?}M*&@B#Fn^pAQSzF?Qb|Ni=abvwAB literal 0 HcmV?d00001 diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/parquet/fake_to_s3_file_parquet.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/parquet/fake_to_s3_file_parquet.conf new file mode 100644 index 000000000000..8738d35db508 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/parquet/fake_to_s3_file_parquet.conf @@ -0,0 +1,87 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + FakeSource { + schema = { + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + } + } + } + result_table_name = "fake" + } +} + +sink { + S3File { + "schema_save_mode"="CREATE_SCHEMA_WHEN_NOT_EXIST" + "data_save_mode"="APPEND_DATA" + "access_key"="XXXXXX" + bucket="s3a://ws-package" + "secret_key"="AWS_XXXX" + "fs.s3a.endpoint"="s3.cn-north-1.amazonaws.com.cn" + "fs.s3a.aws.credentials.provider"="org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" + path = "/tmp/seatunnel/parquet" + row_delimiter = "\n" + partition_dir_expression = "${k0}=${v0}" + is_partition_field_write_in_file = true + file_name_expression = "${transactionId}_${now}" + file_format_type = "parquet" + filename_time_format = "yyyy.MM.dd" + is_enable_transaction = true + compress_codec = "gzip" + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/parquet/s3_file_parquet_projection_to_assert.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/parquet/s3_file_parquet_projection_to_assert.conf new file mode 100644 index 000000000000..6d5e7cc8850f --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/parquet/s3_file_parquet_projection_to_assert.conf @@ -0,0 +1,82 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + S3File { + fs.s3a.endpoint="s3.cn-north-1.amazonaws.com.cn" + fs.s3a.aws.credentials.provider = "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" + access_key = "XXXXXX" + secret_key = "AWS_XXXX" + bucket = "s3a://ws-package" + path = "/test/seatunnel/read/parquet" + file_format_type = "parquet" + read_columns = [c_string, c_boolean, c_double] + result_table_name = "fake" + } +} + +sink { + Assert { + rules { + row_rules = [ + { + rule_type = MAX_ROW + rule_value = 5 + } + ], + field_rules = [ + { + field_name = c_string + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_boolean + field_type = boolean + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_double + field_type = double + field_value = [ + { + rule_type = NOT_NULL + } + ] + } + ] + } + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/parquet/s3_file_parquet_to_assert.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/parquet/s3_file_parquet_to_assert.conf new file mode 100644 index 000000000000..db3619a33f6c --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/parquet/s3_file_parquet_to_assert.conf @@ -0,0 +1,99 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + S3File { + fs.s3a.endpoint="s3.cn-north-1.amazonaws.com.cn" + fs.s3a.aws.credentials.provider = "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" + access_key = "XXXXXX" + secret_key = "AWS_XXXX" + bucket = "s3a://ws-package" + path = "/test/seatunnel/read/parquet" + file_format_type = "parquet" + result_table_name = "fake" + } +} + +sink { + Assert { + rules { + row_rules = [ + { + rule_type = MAX_ROW + rule_value = 5 + } + ], + field_rules = [ + { + field_name = c_string + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_boolean + field_type = boolean + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_double + field_type = double + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = name + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = hobby + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + } + ] + } + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/parquet/s3_file_parquet_to_assert_with_multipletable.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/parquet/s3_file_parquet_to_assert_with_multipletable.conf new file mode 100644 index 000000000000..ba9a8037ab26 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/parquet/s3_file_parquet_to_assert_with_multipletable.conf @@ -0,0 +1,66 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + S3File { + tables_configs = [ + { + schema = { + table = "fake01" + } + fs.s3a.endpoint="s3.cn-north-1.amazonaws.com.cn" + fs.s3a.aws.credentials.provider = "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" + access_key = "XXXXXX" + secret_key = "AWS_XXXX" + bucket = "s3a://ws-package" + path = "/test/seatunnel/read/parquet" + file_format_type = "parquet" + }, + { + schema = { + table = "fake02" + } + fs.s3a.endpoint="s3.cn-north-1.amazonaws.com.cn" + fs.s3a.aws.credentials.provider = "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" + access_key = "XXXXXX" + secret_key = "AWS_XXXX" + bucket = "s3a://ws-package" + path = "/test/seatunnel/read/parquet" + file_format_type = "parquet" + } + ] + result_table_name = "fake" + } +} + +sink { + Assert { + rules { + table-names = ["fake01", "fake02"] + } + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/parquet/s3_file_to_console.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/parquet/s3_file_to_console.conf new file mode 100644 index 000000000000..330ce7479ce8 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/parquet/s3_file_to_console.conf @@ -0,0 +1,42 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + S3File { + fs.s3a.endpoint="s3.cn-north-1.amazonaws.com.cn" + fs.s3a.aws.credentials.provider = "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" + access_key = "XXXXXX" + secret_key = "AWS_XXXX" + bucket = "s3a://ws-package" + path = "/tmp/fake_empty" + file_format_type = "parquet" + } +} + +sink { + Console {} +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/e2e.txt b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/e2e.txt new file mode 100644 index 000000000000..9871cd85eb66 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/e2e.txt @@ -0,0 +1,5 @@ +uDDrwsQQYONTNeUBIOnLAgunvDqLBObroRzdEdvDgRmgaeFyFH5456857591576298739157764687713794636442057612252MTDnafalse3313846190943192276641872220071936002.4798444E389.52375328387482E307vcIGF2023-06-0776258155390368615610.7646252373186602912023-05-08 16:08:51ipToEdierOAbwQfQzObWqiRhjkWYaMKdCbjurhstsWrAVlRyyR2905930362869031292782506910815576701385108050hArFutrue12631169122166306155952414159791708165.949173E372.1775762383875058E307kMlgO2023-05-2027214280267865241887.6424416000104182532023-10-20 03:49:02 +QIpzzZNFkLwARZDSdwdBzkegCdIRVYJnuXgxNXytAJxxaTzmDF16603816781145850255103997497062535321459349811xaTOktrue5327578191749099325840234439082792961.955231E381.5072154481920294E308GDWOu2023-05-0581449039533149712064.4515003874168475032023-07-06 22:34:11sfgxhqvOLzjdTSNcNaWfEnZqvQraSSuMPazCGhPmSrGuxggqGh111449466287130860562118177510004750271267350957FDhTstrue96247293946402921952995131535667203.3240283E384.473485404447698E307YFdwf2023-02-0429456519357128996647.9939318900994572132023-01-12 02:29:58 +xVJPgVlosBlTYSkmJCqKHMXzbZkNQKInuVMZeYGhsmzUmcLyPx137745493211075991209783701051546835517166168384qcYaifalse8318050110096656524405690917018449922.9617934E371.8901064340036343E307jaKMq2023-05-1275317114043170470995.9654034735914367862023-05-18 08:09:22raGGBnHsNwMZKemkFErUbedNjSllNcKOVUGdTpXcHGSVphHsNE86377304018502081846122308810391870441519757437JCRZStrue1829974183977114228752256792969205767.9090967E371.6286963710372255E308NBHUB2023-05-0732934086493941743464.6503746053883129532023-05-06 04:35:55 +dBgFeTKkCfnxCljyGfNEurEzCVgwpsHgmcOfYXiQHxeeQNjQuq1961913761867016982512369059615238191571813320BTfhbfalse652666522281866957533025299230722.1456136E381.2398422714159417E308YOiwg2023-10-2433001899362876139955.7235198795513055732023-06-23 13:46:46jsvmHLHlXCGFKwuqlTwAjdMckElrmqgBWvOuuKuWxcinFZWSky19959088245502706421265289671411088181469730839vUyULtrue952655754382886132164227350822215681.9033253E381.0966562906060974E308XFeKf2023-09-1731084757529957096723.2394423349193989032023-06-15 17:04:50 +obtYzIHOTKsABVtirEKEMYUYobsYlDJcFbpQUYvGxCcKlnswEG8096984004544201585383739017658796661353001394xchcntrue853141253976762312923177914159380482.8480754E381.055208146200822E308MSkTD2023-11-2420361788179232141281.9718823433892185262023-10-25 11:47:50gdCWZMGESyarjQPopBhDwKnOyDvaUDgQOEDRCmfUAagfnDDPqV8473436731118772451890654127233667151574025969ewJzLtrue6321769209768782446484076920790579202.7134378E381.1883616449174808E308STvOu2023-10-0821793351767634029460.2897683013563753232023-08-12 23:57:38 \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/e2e.txt.lzo b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/e2e.txt.lzo new file mode 100644 index 0000000000000000000000000000000000000000..3bd2782c6676b911dd0e45cdbaf2baaaa7b6998e GIT binary patch literal 2720 zcmXYzyKiGz9mnrI=dwW9m6mQbQxJ)kSm*t)&}w2|KN81w9NS6kYW;TNM`C;Y?2srX z5*-p1#THXgOhHKj1ro&+E5skLe*!|3NKo*--pxaDuAO_&`940s^K#X$2A`e0I{Ezb z)9W9<34ahAzkc<8@YkRG?)dT-Q~FI1ocsm9^Wb0WI9_h)s#a@NNu^O4G->9dI=>?4 zqxF14*VYK{tUSK7Mj2-<;nI3# zq*00phEH%Iv2N*QwLlnJ?KUP%yKUcW1&bX>zz_6 zCtc`MT1cyXd1zpY1+I z>*aX$maguW=eN=HX1~87!ZYn9lhg=j@L(>e7uE^QjHjF>>#@c%icM?$$uuM5^JRWb z-Y(Yz!i6D{a|02D;KoR1IMLc`p}6AOThA@-VTew7h1W7CEfFbhZ6d!+PDb27~t&~HS;4cEhTv&+js_XB=0kd`JxWXN!*HUujEf>tF zl+s)?tk^;ory!+7IF=T9Iu#-&_v{7RJ{zT3;5{yJ$386vhxCd8#r-P-i*TWtc`#ICqJIhzDcIQ-!4nf|Z(i zZAm^JH&%a+gI{_>gtEdDiCaJfPXtFYyw_ZE=xB_E(=sIm-gJnDBiNFa%4x;W9Bce{ z$!O*K+u{2!gI7$PF*MVo#Tq@+kimJ6_SqD)_Na(wP+e&#`j0qRF@DCyDHCbo)7qWv zZZEG#^mh8R%8+TJRiDt=Wp+^_#q#~3OWTu5twb*7>)SGI4_es%YIbPOXzyx&y+i4( zRa*JPe9k}+4-1I5PFkc9A<-a+)S3e##=s6hB3$%ZoriB;f;SG-NV*f^e;F2x^@qqZ z)2Lih5e`y#FF*}%xJ9(k6GQ{s;qX~1m~5niM8L#jr&L~qzyB(D%`~eFeZM@!Y)W+Cb~p6`3KSg7AVM`L_GN1TBkk|}^8X$d;+s7i2&kT?WGdH@k7QVyfg z35DKYW^dZ{@44aNB05F5lint)7es&sz^yrU1XTbb2z+JuHa6h~PaPSBC*%I0t=36Z zNF`HjX8x87zCcebkpk)=sBp?Fv``{?DT8KMh44z}Ksf)OmtX<$P!p!oQm3^(>1Rgy z0d15f*&&(lveQSpFATBK^)h$J(%aGYc}0q&S+7cmt^0=>DeeY?8m&Cm)(h0#pz#vp z0=mFV;Pg@n5pH6S@q$B%;s&5=^DFw+>0yRhVS5(I-lgqR)>(HI|MK1m7kKq1I! zlT?5=hD-C|CHRq$7%M{9BVVz@BQ9FihwbR=FN4AhH5Q%0;2!>SwUs|WEknqA; zyhfFQ^@N^Fj29hoCa@0R8k47CiNS8YBbE;2U8&rr|z6C=EZx zC;kjglz1QjWqjm zo9g;Q3>g?sO35v~MEVm^K?e*F!%|2ko*?0Y22wE9{~4dSWL^lF);2BOPro~e{vXl< BZsh;~ literal 0 HcmV?d00001 diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/e2e_delimiter.txt b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/e2e_delimiter.txt new file mode 100644 index 000000000000..b87687448ca6 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/e2e_delimiter.txt @@ -0,0 +1,5 @@ +qwerqwer|1972607327106509113020400507301104442513849629249|qwer|true|108|22432|11383204|723560014108175360|3.1407707E38|1.262116635132156E308|zlmzw|2023-05-25|97236477433882034782.803540569732795689|2023-03-25 04:30:13|qwerqwer1458583961104266156763552401211382922561937221393qwertrue930925142792030530244095935039344647.838737E373.3238256808030654E307Zicjq2023-10-1918739344608215707574.2737367351403166822023-10-07 08:24:27 +qwerqwer|20734545375230101131603368534223532992574063143|qwer|true|99|21567|768189694|8504422836686883840|1.3761162E38|5.460153079423635E307|dkCwG|2023-05-19|83044404421834652395.960138696348105704|2023-03-24 10:48:12|qwerqwer2774295104069855819185865051778415509162817756qwerfalse1619571127265647324402356645454202881.8446726E381.7000909191489263E308cXxQV2023-07-2713431695514477025331.5815661990272672962023-12-22 12:26:16 +qwerqwer|11147903451235598576860383707165213199232994316|qwer|true|49|21122|1110303282|2083282743100007424|1.9729736E38|1.0399541425415623E308|muvcN|2023-08-13|68941603382218317993.487441177291093700|2023-04-06 02:40:57|qwerqwer69745783829424948385550024313502468211004949206qwertrue117227855844811138143962162044856324.844609E374.992962483991954E307pPYZS2023-05-1751345924758748590630.6631664051742477762023-12-10 19:23:26 +qwerqwer|12600145717385486047323762331460409881387559257|qwer|true|54|30782|475296705|6520650210788816896|3.253564E38|1.181636072812166E308|RxBAU|2023-03-14|94882795877228509625.376060071805770292|2023-02-25 15:29:26|qwerqwer17078206571395918506189177703116985975671620089209qwerfalse11415353139002758476082670167752366081.4806856E385.82327433457546E307ppTVu2023-10-2784302780955330822761.6237458260160280852023-08-23 09:26:16 +qwerqwer|10811140972103212018816962034437650301336224152|qwer|true|82|27637|1110251085|806786601324796928|7.711023E37|4.398648945575819E307|kGVbL|2023-04-26|80164231813502964946.202647535547152674|2023-04-15 05:22:59|qwerqwer800727634149093075168463891515323059061714847070qwertrue351280654957024134756885372412119043.0538885E384.631561190310559E306leTTG2023-11-1490016690865756655359.8578360402194859042023-08-23 10:30:18 \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/e2e_time_format.txt b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/e2e_time_format.txt new file mode 100644 index 000000000000..b53cbf2e9954 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/e2e_time_format.txt @@ -0,0 +1,5 @@ +PgxkWerPquZxADbwRoyZXWZYjOZGvPkcRgcvBHHlSezTHszfCM9312330451016879764123434177920993828271669125781ofduatrue02080228995824162301554221532246172.5053808E386.563348638622289E306KjJjD2023-03-2048955630047163560901.2848899427908584092023-01-11T06:25:29OxqxAMLLAWTMzSvpVKDBXwVuLuVMdhJAbNYRqEmrsQBARdHLAo98774360215016679841391554731369111688804353367rsgcotrue1212280190712202440694969264535828985.9302515E371.2125301856008725E308tVuZI2023-08-0322004483923120397310.0486453397455656992023-06-10T17:15:02 +zxMhGtbuHzxGFwmfFHIUAFvvTgUvQqetaDxOzAavJELHDSdPEV101244983376266331014538704017395315171492457270otcMntrue1024860139917168158893375714893248001.4333913E381.4334353544948444E308VdcYj2023-05-1991883965802194963022.6890574501331289452023-04-26T00:46:03PRIEJkcMnYJRsURrfhCbSgtGebklCfMXxzhZOZMudVetgtUCXc7736457412116475204164682112718260472701764785855oCRKRtrue851925389193674632146772472708622432.5017376E371.4791889801142986E308KIZKN2023-09-1334541234299674175851.0304104953008357352023-08-21T23:52:24 +EIYLFVjmjZXKcbLQtzXKMzIqLccyubcQygIssDqfcwotNQDdfH1836526392121945431313063532901700703821233811949qIlEotrue92987344051191848244308123217417652.6358307E379.12573038650651E307wrQCE2023-06-1169873404793136392100.0758355471497874132023-02-25T07:13:57IRAHziGvkRHEaUmcameBKDUCNFEjmKaafwSGblGdJGGyzQivvd12711189912021715577886030065553480147504046565RpOswfalse122122444030768933777305146193430843.3350248E381.2526133143299848E308kzyBq2023-07-1557715748983349653587.0631369056378550372023-04-28T16:02:28 +tfaoRtCwuXCoiKkBcvPOoixYBZnaUlPQMFaRjxhigVLzmBrskw190529529814451211117678789994558371211783348ccGkzfalse1112829829981778213199660821898045981.2857434E383.343575138440927E307SsSaC2023-10-2658282015679301802224.6155516408553745142023-01-26T13:15:35IETWTtUXEMkdNCiBvZPKghKHXjQUvSMaMsKYCmzsLRjFhEQXyv76798692084328842150475226014007534741586287890wbzKKtrue6625604192054124846725009551245517061.307359E381.6429413197552776E308QdOjL2023-02-2257671928068543569766.1712121225441028432023-03-28T03:01:44 +hdTngggfdRvAAMngAsZUYTEQuTFQEHdIzjOIEGIoYrTYZLIvey760974310142710026829414079475220181644270624MLIllfalse3622155133605466673524332669773532608.235333E379.308989713025347E307nrzoy2023-05-2418552644397825116718.5869443937920164442023-01-08T10:11:24bzXNzJVCPXkxeiQSpYXaVJoHWTJnKJbeIiuknfLOtQAGrKUoFr5144561031691489776106356671519647880411104465196MwxgFtrue501558654910648188780747761689955443.637149E371.4784398529023391E308cZRyO2023-06-0419268168651664178359.9430267663053671912023-11-28T19:35:41 \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/fake_to_s3_file_text.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/fake_to_s3_file_text.conf new file mode 100644 index 000000000000..145081911a2a --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/fake_to_s3_file_text.conf @@ -0,0 +1,87 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + FakeSource { + schema = { + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + } + } + } + result_table_name = "fake" + } +} + +sink { + S3File { + "schema_save_mode"="CREATE_SCHEMA_WHEN_NOT_EXIST" + "data_save_mode"="APPEND_DATA" + "access_key"="XXXXXX" + bucket="s3a://ws-package" + "secret_key"="AWS_XXXX" + "fs.s3a.endpoint"="s3.cn-north-1.amazonaws.com.cn" + "fs.s3a.aws.credentials.provider"="org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" + path = "/tmp/seatunnel/text" + row_delimiter = "\n" + partition_dir_expression = "${k0}=${v0}" + is_partition_field_write_in_file = true + file_name_expression = "${transactionId}_${now}" + file_format_type = "text" + filename_time_format = "yyyy.MM.dd" + is_enable_transaction = true + compress_codec = "lzo" + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/fake_to_s3_file_with_multiple_table.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/fake_to_s3_file_with_multiple_table.conf new file mode 100644 index 000000000000..70e90d7e50a4 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/fake_to_s3_file_with_multiple_table.conf @@ -0,0 +1,128 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + FakeSource { + tables_configs = [ + { + schema = { + table = "fake1" + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + } + } + } + }, + { + schema = { + table = "fake2" + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + } + } + } + } + ] + } +} + +sink { + S3File { + "schema_save_mode"="CREATE_SCHEMA_WHEN_NOT_EXIST" + "data_save_mode"="APPEND_DATA" + "access_key"="XXXXXX" + bucket="s3a://ws-package" + "secret_key"="AWS_XXXX" + "fs.s3a.endpoint"="s3.cn-north-1.amazonaws.com.cn" + "fs.s3a.aws.credentials.provider"="org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" + path = "/tmp/fake_empty/text/${table_name}" + row_delimiter = "\n" + partition_dir_expression = "${k0}=${v0}" + is_partition_field_write_in_file = true + file_name_expression = "${transactionId}_${now}" + file_format_type = "text" + filename_time_format = "yyyy.MM.dd" + is_enable_transaction = true + compress_codec = "lzo" + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/s3_file_delimiter_assert.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/s3_file_delimiter_assert.conf new file mode 100644 index 000000000000..af82ff5b644f --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/s3_file_delimiter_assert.conf @@ -0,0 +1,109 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + S3File { + fs.s3a.endpoint="s3.cn-north-1.amazonaws.com.cn" + fs.s3a.aws.credentials.provider = "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" + access_key = "XXXXXX" + secret_key = "AWS_XXXX" + bucket = "s3a://ws-package" + path = "/test/seatunnel/read/text_delimiter" + schema = { + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + } + } + } + file_format_type = "text" + read_columns = [c_string, c_boolean] + delimiter = "\\|" + result_table_name = "fake" + } +} + +sink { + Assert { + rules { + row_rules = [ + { + rule_type = MAX_ROW + rule_value = 5 + } + ], + field_rules = [ + { + field_name = c_string + field_type = string + field_value = [ + { + equals_to = "qwer" + } + ] + }, + { + field_name = c_boolean + field_type = boolean + field_value = [ + { + equals_to = true + } + ] + } + ] + } + } +} + diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/s3_file_text_lzo_to_assert.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/s3_file_text_lzo_to_assert.conf new file mode 100644 index 000000000000..44b1a6908615 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/s3_file_text_lzo_to_assert.conf @@ -0,0 +1,143 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + S3File { + fs.s3a.endpoint="s3.cn-north-1.amazonaws.com.cn" + fs.s3a.aws.credentials.provider = "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" + access_key = "XXXXXX" + secret_key = "AWS_XXXX" + bucket = "s3a://ws-package" + path = "/test/seatunnel/read/lzo_text" + partition_dir_expression = "${k0}=${v0}" + is_partition_field_write_in_file = true + file_name_expression = "${transactionId}_${now}" + file_format_type = "text" + filename_time_format = "yyyy.MM.dd" + is_enable_transaction = true + compress_codec = "lzo" + schema = { + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + } + } + } + result_table_name = "fake" + } +} + +transform { + sql { + source_table_name = "fake" + result_table_name = "sqlresult" + query = "select * from fake where c_string = 'MTDna'" + } +} + +sink { + Assert { + source_table_name = "sqlresult" + rules { + row_rules = [ + { + rule_type = MAX_ROW + rule_value = 1 + }, + { + rule_type = MIN_ROW + rule_value = 1 + } + ], + field_rules = [ + { + field_name = c_string + field_type = string + field_value = [ + { + equals_to = "MTDna" + } + ] + }, + { + field_name = c_boolean + field_type = boolean + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_smallint + field_type = short + field_value = [ + { + equals_to = 13846 + } + ] + }, + { + field_name = c_date + field_type = date + field_value = [ + { + equals_to = "2023-06-07" + } + ] + } + ] + } + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/s3_file_text_projection_to_assert.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/s3_file_text_projection_to_assert.conf new file mode 100644 index 000000000000..a7de9c1c5f83 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/s3_file_text_projection_to_assert.conf @@ -0,0 +1,134 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + S3File { + fs.s3a.endpoint="s3.cn-north-1.amazonaws.com.cn" + fs.s3a.aws.credentials.provider = "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" + access_key = "XXXXXX" + secret_key = "AWS_XXXX" + bucket = "s3a://ws-package" + path = "/test/seatunnel/read/text" + file_format_type = "text" + read_columns = [c_string, c_boolean, c_double] + schema = { + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + } + } + } + result_table_name = "fake" + } +} + +sink { + Assert { + rules { + row_rules = [ + { + rule_type = MAX_ROW + rule_value = 5 + } + ], + field_rules = [ + { + field_name = c_string + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_boolean + field_type = boolean + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_double + field_type = double + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = name + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = hobby + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + } + ] + } + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/s3_file_text_skip_headers.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/s3_file_text_skip_headers.conf new file mode 100644 index 000000000000..92de5de9838b --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/s3_file_text_skip_headers.conf @@ -0,0 +1,134 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + S3File { + fs.s3a.endpoint="s3.cn-north-1.amazonaws.com.cn" + fs.s3a.aws.credentials.provider = "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" + access_key = "XXXXXX" + secret_key = "AWS_XXXX" + bucket = "s3a://ws-package" + path = "/test/seatunnel/read/text" + file_format_type = "text" + skip_header_row_number = 1 + schema = { + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + } + } + } + result_table_name = "fake" + } +} + +sink { + Assert { + rules { + row_rules = [ + { + rule_type = MAX_ROW + rule_value = 4 + } + ], + field_rules = [ + { + field_name = c_string + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_boolean + field_type = boolean + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_double + field_type = double + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = name + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = hobby + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + } + ] + } + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/s3_file_text_to_assert.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/s3_file_text_to_assert.conf new file mode 100644 index 000000000000..624f71f199fb --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/s3_file_text_to_assert.conf @@ -0,0 +1,133 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + S3File { + fs.s3a.endpoint="s3.cn-north-1.amazonaws.com.cn" + fs.s3a.aws.credentials.provider = "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" + access_key = "XXXXXX" + secret_key = "AWS_XXXX" + bucket = "s3a://ws-package" + path = "/test/seatunnel/read/text" + file_format_type = "text" + schema = { + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + } + } + } + result_table_name = "fake" + } +} + +sink { + Assert { + rules { + row_rules = [ + { + rule_type = MAX_ROW + rule_value = 5 + } + ], + field_rules = [ + { + field_name = c_string + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_boolean + field_type = boolean + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_double + field_type = double + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = name + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = hobby + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + } + ] + } + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/s3_file_text_to_assert_with_multipletable.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/s3_file_text_to_assert_with_multipletable.conf new file mode 100644 index 000000000000..41c2533423c5 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/s3_file_text_to_assert_with_multipletable.conf @@ -0,0 +1,130 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + S3File { + tables_configs = [ + { + fs.s3a.endpoint="s3.cn-north-1.amazonaws.com.cn" + fs.s3a.aws.credentials.provider = "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" + access_key = "XXXXXX" + secret_key = "AWS_XXXX" + bucket = "s3a://ws-package" + path = "/test/seatunnel/read/text" + file_format_type = "text" + schema = { + table = "fake01" + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + } + } + } + }, + { + fs.s3a.endpoint="s3.cn-north-1.amazonaws.com.cn" + fs.s3a.aws.credentials.provider = "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" + access_key = "XXXXXX" + secret_key = "AWS_XXXX" + bucket = "s3a://ws-package" + path = "/test/seatunnel/read/text" + file_format_type = "text" + schema = { + table = "fake02" + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + } + } + } + } + ] + result_table_name = "fake" + } +} + +sink { + Assert { + rules { + table-names = ["fake01", "fake02"] + } + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/s3_file_time_format_assert.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/s3_file_time_format_assert.conf new file mode 100644 index 000000000000..cc8c814e95d0 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/text/s3_file_time_format_assert.conf @@ -0,0 +1,100 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + execution.parallelism = 1 + spark.app.name = "SeaTunnel" + spark.executor.instances = 2 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + job.mode = "BATCH" +} + +source { + S3File { + fs.s3a.endpoint="s3.cn-north-1.amazonaws.com.cn" + fs.s3a.aws.credentials.provider = "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" + access_key = "XXXXXX" + secret_key = "AWS_XXXX" + bucket = "s3a://ws-package" + path = "/test/seatunnel/read/text_time_format" + file_format_type = "text" + schema = { + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + } + } + } + read_columns = [c_timestamp] + datetime_format = "yyyy-MM-dd'T'HH:mm:ss" + result_table_name = "fake" + } +} + +sink { + Assert { + rules { + row_rules = [ + { + rule_type = MAX_ROW + rule_value = 5 + } + ], + field_rules = [ + { + field_name = c_timestamp + field_type = timestamp + field_value = [ + { + rule_type = NOT_NULL + } + ] + } + ] + } + } +} + diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/pom.xml b/seatunnel-e2e/seatunnel-connector-v2-e2e/pom.xml index 477b0620d285..455f9e76594c 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/pom.xml +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/pom.xml @@ -40,6 +40,7 @@ connector-file-cos-e2e connector-file-sftp-e2e connector-file-oss-e2e + connector-file-s3-e2e connector-cassandra-e2e connector-neo4j-e2e connector-http-e2e diff --git a/seatunnel-shade/seatunnel-hadoop3-3.1.4-uber/pom.xml b/seatunnel-shade/seatunnel-hadoop3-3.1.4-uber/pom.xml index be5ced9214aa..322c1ed65b55 100644 --- a/seatunnel-shade/seatunnel-hadoop3-3.1.4-uber/pom.xml +++ b/seatunnel-shade/seatunnel-hadoop3-3.1.4-uber/pom.xml @@ -97,6 +97,7 @@ com.google.common.base.* com.google.common.cache.* com.google.common.collect.* + com.google.common.util.*