From a8f416f92b2c13101c8b04d43ea7224f0dcabe3d Mon Sep 17 00:00:00 2001 From: ajantha-bhat Date: Tue, 18 Jul 2023 18:54:32 +0530 Subject: [PATCH] Core: Move `iceberg-parquet` files to `iceberg-core` - Since core module need to write stats in parquet format, to avoid circular dependency, move all the files from iceberg-parquet module to iceberg code. - `TestParquetReadProjection` used to duplicate the test code of iceberg-api module's `TestReadProjection`. Removed the duplicate class and instead directly extend the original class from iceberg-api module. - Update TestParquetReadProjection to skip empty struct testcases as only Avro readers supports it. The testcases are now common for both Avro and Parquet readers. --- README.md | 1 - build.gradle | 45 +++++-------------- .../data/parquet/BaseParquetReaders.java | 0 .../data/parquet/BaseParquetWriter.java | 0 .../data/parquet/GenericParquetReaders.java | 0 .../data/parquet/GenericParquetWriter.java | 0 .../iceberg/data/parquet/InternalReader.java | 0 .../iceberg/data/parquet/InternalWriter.java | 0 .../iceberg/parquet/ApplyNameMapping.java | 0 .../iceberg/parquet/BaseColumnIterator.java | 0 .../iceberg/parquet/BasePageIterator.java | 0 .../iceberg/parquet/ColumnIterator.java | 0 .../apache/iceberg/parquet/ColumnWriter.java | 0 .../iceberg/parquet/MessageTypeToType.java | 0 .../apache/iceberg/parquet/PageIterator.java | 0 .../org/apache/iceberg/parquet/Parquet.java | 0 .../apache/iceberg/parquet/ParquetAvro.java | 0 .../iceberg/parquet/ParquetAvroReader.java | 0 .../parquet/ParquetAvroValueReaders.java | 0 .../iceberg/parquet/ParquetAvroWriter.java | 0 .../parquet/ParquetBloomRowGroupFilter.java | 0 .../iceberg/parquet/ParquetCodecFactory.java | 0 .../iceberg/parquet/ParquetConversions.java | 0 .../ParquetDictionaryRowGroupFilter.java | 0 .../iceberg/parquet/ParquetFilters.java | 0 .../org/apache/iceberg/parquet/ParquetIO.java | 0 .../iceberg/parquet/ParquetIterable.java | 0 .../parquet/ParquetMetricsRowGroupFilter.java | 0 .../iceberg/parquet/ParquetReadSupport.java | 0 .../apache/iceberg/parquet/ParquetReader.java | 0 .../iceberg/parquet/ParquetSchemaUtil.java | 0 .../iceberg/parquet/ParquetTypeVisitor.java | 0 .../apache/iceberg/parquet/ParquetUtil.java | 0 .../iceberg/parquet/ParquetValueReader.java | 0 .../iceberg/parquet/ParquetValueReaders.java | 0 .../iceberg/parquet/ParquetValueWriter.java | 0 .../iceberg/parquet/ParquetValueWriters.java | 0 .../iceberg/parquet/ParquetWriteAdapter.java | 0 .../iceberg/parquet/ParquetWriteSupport.java | 0 .../apache/iceberg/parquet/ParquetWriter.java | 0 .../apache/iceberg/parquet/PruneColumns.java | 0 .../org/apache/iceberg/parquet/ReadConf.java | 0 .../org/apache/iceberg/parquet/RemoveIds.java | 0 .../iceberg/parquet/TripleIterator.java | 0 .../apache/iceberg/parquet/TripleWriter.java | 0 .../iceberg/parquet/TypeToMessageType.java | 0 .../parquet/TypeWithSchemaVisitor.java | 0 .../iceberg/parquet/ValuesAsBytesReader.java | 0 .../parquet/VectorizedParquetReader.java | 0 .../iceberg/parquet/VectorizedReader.java | 0 .../avro/TestParquetReadProjection.java | 0 .../parquet/ParquetWritingTestUtils.java | 0 .../parquet/TestBloomRowGroupFilter.java | 0 .../parquet/TestCDHParquetStatistics.java | 0 .../parquet/TestDictionaryRowGroupFilter.java | 0 .../iceberg/parquet/TestInternalParquet.java | 0 .../apache/iceberg/parquet/TestParquet.java | 0 .../parquet/TestParquetDataWriter.java | 0 .../parquet/TestParquetDeleteWriters.java | 0 .../parquet/TestParquetEncryption.java | 0 .../parquet/TestParquetSchemaUtil.java | 0 .../iceberg/parquet/TestPruneColumns.java | 0 docs/docs/api.md | 1 - flink/v1.18/build.gradle | 1 - flink/v1.19/build.gradle | 1 - flink/v1.20/build.gradle | 1 - kafka-connect/build.gradle | 2 - mr/build.gradle | 1 - settings.gradle | 2 - site/docs/contribute.md | 2 - site/docs/releases.md | 2 - spark/v3.3/build.gradle | 2 - spark/v3.4/build.gradle | 2 - spark/v3.5/build.gradle | 2 - 74 files changed, 12 insertions(+), 53 deletions(-) rename {parquet => core}/src/main/java/org/apache/iceberg/data/parquet/BaseParquetReaders.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/data/parquet/BaseParquetWriter.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/data/parquet/GenericParquetReaders.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/data/parquet/GenericParquetWriter.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/data/parquet/InternalReader.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/data/parquet/InternalWriter.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/ApplyNameMapping.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/BaseColumnIterator.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/BasePageIterator.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/ColumnIterator.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/ColumnWriter.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/MessageTypeToType.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/PageIterator.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/Parquet.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/ParquetAvro.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/ParquetAvroReader.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/ParquetAvroValueReaders.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/ParquetAvroWriter.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/ParquetBloomRowGroupFilter.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/ParquetCodecFactory.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/ParquetConversions.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/ParquetDictionaryRowGroupFilter.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/ParquetFilters.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/ParquetIO.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/ParquetIterable.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/ParquetMetricsRowGroupFilter.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/ParquetReadSupport.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/ParquetReader.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/ParquetSchemaUtil.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/ParquetTypeVisitor.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/ParquetUtil.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/ParquetValueReader.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/ParquetValueReaders.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/ParquetValueWriter.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/ParquetValueWriters.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/ParquetWriteAdapter.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/ParquetWriteSupport.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/ParquetWriter.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/PruneColumns.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/ReadConf.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/RemoveIds.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/TripleIterator.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/TripleWriter.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/TypeToMessageType.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/TypeWithSchemaVisitor.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/ValuesAsBytesReader.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/VectorizedParquetReader.java (100%) rename {parquet => core}/src/main/java/org/apache/iceberg/parquet/VectorizedReader.java (100%) rename {parquet => core}/src/test/java/org/apache/iceberg/avro/TestParquetReadProjection.java (100%) rename {parquet => core}/src/test/java/org/apache/iceberg/parquet/ParquetWritingTestUtils.java (100%) rename {parquet => core}/src/test/java/org/apache/iceberg/parquet/TestBloomRowGroupFilter.java (100%) rename {parquet => core}/src/test/java/org/apache/iceberg/parquet/TestCDHParquetStatistics.java (100%) rename {parquet => core}/src/test/java/org/apache/iceberg/parquet/TestDictionaryRowGroupFilter.java (100%) rename {parquet => core}/src/test/java/org/apache/iceberg/parquet/TestInternalParquet.java (100%) rename {parquet => core}/src/test/java/org/apache/iceberg/parquet/TestParquet.java (100%) rename {parquet => core}/src/test/java/org/apache/iceberg/parquet/TestParquetDataWriter.java (100%) rename {parquet => core}/src/test/java/org/apache/iceberg/parquet/TestParquetDeleteWriters.java (100%) rename {parquet => core}/src/test/java/org/apache/iceberg/parquet/TestParquetEncryption.java (100%) rename {parquet => core}/src/test/java/org/apache/iceberg/parquet/TestParquetSchemaUtil.java (100%) rename {parquet => core}/src/test/java/org/apache/iceberg/parquet/TestPruneColumns.java (100%) diff --git a/README.md b/README.md index 1fe77ceffcf9..a827276242b0 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,6 @@ Iceberg table support is organized in library modules: * `iceberg-common` contains utility classes used in other modules * `iceberg-api` contains the public Iceberg API * `iceberg-core` contains implementations of the Iceberg API and support for Avro data files, **this is what processing engines should depend on** -* `iceberg-parquet` is an optional module for working with tables backed by Parquet files * `iceberg-arrow` is an optional module for reading Parquet into Arrow memory * `iceberg-orc` is an optional module for working with tables backed by ORC files * `iceberg-hive-metastore` is an implementation of Iceberg tables backed by the Hive metastore Thrift client diff --git a/build.gradle b/build.gradle index 65b42358402a..a8d25e56afc7 100644 --- a/build.gradle +++ b/build.gradle @@ -111,7 +111,7 @@ if (file("${rootDir}/iceberg-build.properties").exists()) { } def projectVersion = getProjectVersion() -final REVAPI_PROJECTS = ["iceberg-api", "iceberg-core", "iceberg-parquet", "iceberg-orc", "iceberg-common", "iceberg-data"] +final REVAPI_PROJECTS = ["iceberg-api", "iceberg-core", "iceberg-orc", "iceberg-common", "iceberg-data"] allprojects { group = "org.apache.iceberg" @@ -157,7 +157,7 @@ subprojects { rootTask.finalizedBy showDeprecationRulesOnRevApiFailure } } - + tasks.named("revapiAnalyze").configure { dependsOn(":iceberg-common:jar") } @@ -353,6 +353,13 @@ project(':iceberg-core') { exclude group: 'org.slf4j', module: 'slf4j-log4j12' } + implementation(libs.parquet.avro) { + exclude group: 'org.apache.avro', module: 'avro' + // already shaded by Parquet + exclude group: 'it.unimi.dsi' + exclude group: 'org.codehaus.jackson' + } + testImplementation libs.jetty.servlet testImplementation libs.jakarta.servlet testImplementation libs.jetty.server @@ -371,7 +378,6 @@ project(':iceberg-data') { implementation project(path: ':iceberg-bundled-guava', configuration: 'shadow') api project(':iceberg-api') implementation project(':iceberg-core') - compileOnly project(':iceberg-parquet') compileOnly project(':iceberg-orc') compileOnly(libs.hadoop2.common) { exclude group: 'commons-beanutils' @@ -564,7 +570,6 @@ project(':iceberg-delta-lake') { api project(':iceberg-api') implementation project(':iceberg-common') implementation project(':iceberg-core') - implementation project(':iceberg-parquet') implementation platform(libs.jackson.bom) implementation libs.jackson.databind annotationProcessor libs.immutables.value @@ -770,33 +775,6 @@ project(':iceberg-orc') { } } -project(':iceberg-parquet') { - test { - useJUnitPlatform() - } - dependencies { - implementation project(path: ':iceberg-bundled-guava', configuration: 'shadow') - api project(':iceberg-api') - implementation project(':iceberg-core') - implementation project(':iceberg-common') - - implementation(libs.parquet.avro) { - exclude group: 'org.apache.avro', module: 'avro' - // already shaded by Parquet - exclude group: 'it.unimi.dsi' - exclude group: 'org.codehaus.jackson' - } - - compileOnly libs.avro.avro - compileOnly(libs.hadoop2.client) { - exclude group: 'org.apache.avro', module: 'avro' - } - - testImplementation project(path: ':iceberg-api', configuration: 'testArtifacts') - testImplementation project(path: ':iceberg-core', configuration: 'testArtifacts') - } -} - project(':iceberg-arrow') { test { useJUnitPlatform() @@ -805,7 +783,6 @@ project(':iceberg-arrow') { implementation project(path: ':iceberg-bundled-guava', configuration: 'shadow') api project(':iceberg-api') implementation project(':iceberg-core') - implementation project(':iceberg-parquet') implementation(libs.arrow.vector) { exclude group: 'io.netty', module: 'netty-buffer' @@ -897,7 +874,9 @@ project(':iceberg-snowflake') { } dependencies { - implementation project(':iceberg-core') + implementation(project(':iceberg-core')) { + exclude group: 'com.github.luben', module: 'zstd-jni' + } implementation project(':iceberg-common') implementation project(path: ':iceberg-bundled-guava', configuration: 'shadow') implementation platform(libs.jackson.bom) diff --git a/parquet/src/main/java/org/apache/iceberg/data/parquet/BaseParquetReaders.java b/core/src/main/java/org/apache/iceberg/data/parquet/BaseParquetReaders.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/data/parquet/BaseParquetReaders.java rename to core/src/main/java/org/apache/iceberg/data/parquet/BaseParquetReaders.java diff --git a/parquet/src/main/java/org/apache/iceberg/data/parquet/BaseParquetWriter.java b/core/src/main/java/org/apache/iceberg/data/parquet/BaseParquetWriter.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/data/parquet/BaseParquetWriter.java rename to core/src/main/java/org/apache/iceberg/data/parquet/BaseParquetWriter.java diff --git a/parquet/src/main/java/org/apache/iceberg/data/parquet/GenericParquetReaders.java b/core/src/main/java/org/apache/iceberg/data/parquet/GenericParquetReaders.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/data/parquet/GenericParquetReaders.java rename to core/src/main/java/org/apache/iceberg/data/parquet/GenericParquetReaders.java diff --git a/parquet/src/main/java/org/apache/iceberg/data/parquet/GenericParquetWriter.java b/core/src/main/java/org/apache/iceberg/data/parquet/GenericParquetWriter.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/data/parquet/GenericParquetWriter.java rename to core/src/main/java/org/apache/iceberg/data/parquet/GenericParquetWriter.java diff --git a/parquet/src/main/java/org/apache/iceberg/data/parquet/InternalReader.java b/core/src/main/java/org/apache/iceberg/data/parquet/InternalReader.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/data/parquet/InternalReader.java rename to core/src/main/java/org/apache/iceberg/data/parquet/InternalReader.java diff --git a/parquet/src/main/java/org/apache/iceberg/data/parquet/InternalWriter.java b/core/src/main/java/org/apache/iceberg/data/parquet/InternalWriter.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/data/parquet/InternalWriter.java rename to core/src/main/java/org/apache/iceberg/data/parquet/InternalWriter.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ApplyNameMapping.java b/core/src/main/java/org/apache/iceberg/parquet/ApplyNameMapping.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/ApplyNameMapping.java rename to core/src/main/java/org/apache/iceberg/parquet/ApplyNameMapping.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/BaseColumnIterator.java b/core/src/main/java/org/apache/iceberg/parquet/BaseColumnIterator.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/BaseColumnIterator.java rename to core/src/main/java/org/apache/iceberg/parquet/BaseColumnIterator.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/BasePageIterator.java b/core/src/main/java/org/apache/iceberg/parquet/BasePageIterator.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/BasePageIterator.java rename to core/src/main/java/org/apache/iceberg/parquet/BasePageIterator.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ColumnIterator.java b/core/src/main/java/org/apache/iceberg/parquet/ColumnIterator.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/ColumnIterator.java rename to core/src/main/java/org/apache/iceberg/parquet/ColumnIterator.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ColumnWriter.java b/core/src/main/java/org/apache/iceberg/parquet/ColumnWriter.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/ColumnWriter.java rename to core/src/main/java/org/apache/iceberg/parquet/ColumnWriter.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/MessageTypeToType.java b/core/src/main/java/org/apache/iceberg/parquet/MessageTypeToType.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/MessageTypeToType.java rename to core/src/main/java/org/apache/iceberg/parquet/MessageTypeToType.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/PageIterator.java b/core/src/main/java/org/apache/iceberg/parquet/PageIterator.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/PageIterator.java rename to core/src/main/java/org/apache/iceberg/parquet/PageIterator.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/Parquet.java b/core/src/main/java/org/apache/iceberg/parquet/Parquet.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/Parquet.java rename to core/src/main/java/org/apache/iceberg/parquet/Parquet.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetAvro.java b/core/src/main/java/org/apache/iceberg/parquet/ParquetAvro.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/ParquetAvro.java rename to core/src/main/java/org/apache/iceberg/parquet/ParquetAvro.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetAvroReader.java b/core/src/main/java/org/apache/iceberg/parquet/ParquetAvroReader.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/ParquetAvroReader.java rename to core/src/main/java/org/apache/iceberg/parquet/ParquetAvroReader.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetAvroValueReaders.java b/core/src/main/java/org/apache/iceberg/parquet/ParquetAvroValueReaders.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/ParquetAvroValueReaders.java rename to core/src/main/java/org/apache/iceberg/parquet/ParquetAvroValueReaders.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetAvroWriter.java b/core/src/main/java/org/apache/iceberg/parquet/ParquetAvroWriter.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/ParquetAvroWriter.java rename to core/src/main/java/org/apache/iceberg/parquet/ParquetAvroWriter.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetBloomRowGroupFilter.java b/core/src/main/java/org/apache/iceberg/parquet/ParquetBloomRowGroupFilter.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/ParquetBloomRowGroupFilter.java rename to core/src/main/java/org/apache/iceberg/parquet/ParquetBloomRowGroupFilter.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetCodecFactory.java b/core/src/main/java/org/apache/iceberg/parquet/ParquetCodecFactory.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/ParquetCodecFactory.java rename to core/src/main/java/org/apache/iceberg/parquet/ParquetCodecFactory.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetConversions.java b/core/src/main/java/org/apache/iceberg/parquet/ParquetConversions.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/ParquetConversions.java rename to core/src/main/java/org/apache/iceberg/parquet/ParquetConversions.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetDictionaryRowGroupFilter.java b/core/src/main/java/org/apache/iceberg/parquet/ParquetDictionaryRowGroupFilter.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/ParquetDictionaryRowGroupFilter.java rename to core/src/main/java/org/apache/iceberg/parquet/ParquetDictionaryRowGroupFilter.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetFilters.java b/core/src/main/java/org/apache/iceberg/parquet/ParquetFilters.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/ParquetFilters.java rename to core/src/main/java/org/apache/iceberg/parquet/ParquetFilters.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetIO.java b/core/src/main/java/org/apache/iceberg/parquet/ParquetIO.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/ParquetIO.java rename to core/src/main/java/org/apache/iceberg/parquet/ParquetIO.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetIterable.java b/core/src/main/java/org/apache/iceberg/parquet/ParquetIterable.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/ParquetIterable.java rename to core/src/main/java/org/apache/iceberg/parquet/ParquetIterable.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetMetricsRowGroupFilter.java b/core/src/main/java/org/apache/iceberg/parquet/ParquetMetricsRowGroupFilter.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/ParquetMetricsRowGroupFilter.java rename to core/src/main/java/org/apache/iceberg/parquet/ParquetMetricsRowGroupFilter.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetReadSupport.java b/core/src/main/java/org/apache/iceberg/parquet/ParquetReadSupport.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/ParquetReadSupport.java rename to core/src/main/java/org/apache/iceberg/parquet/ParquetReadSupport.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetReader.java b/core/src/main/java/org/apache/iceberg/parquet/ParquetReader.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/ParquetReader.java rename to core/src/main/java/org/apache/iceberg/parquet/ParquetReader.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetSchemaUtil.java b/core/src/main/java/org/apache/iceberg/parquet/ParquetSchemaUtil.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/ParquetSchemaUtil.java rename to core/src/main/java/org/apache/iceberg/parquet/ParquetSchemaUtil.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetTypeVisitor.java b/core/src/main/java/org/apache/iceberg/parquet/ParquetTypeVisitor.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/ParquetTypeVisitor.java rename to core/src/main/java/org/apache/iceberg/parquet/ParquetTypeVisitor.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetUtil.java b/core/src/main/java/org/apache/iceberg/parquet/ParquetUtil.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/ParquetUtil.java rename to core/src/main/java/org/apache/iceberg/parquet/ParquetUtil.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueReader.java b/core/src/main/java/org/apache/iceberg/parquet/ParquetValueReader.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueReader.java rename to core/src/main/java/org/apache/iceberg/parquet/ParquetValueReader.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueReaders.java b/core/src/main/java/org/apache/iceberg/parquet/ParquetValueReaders.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueReaders.java rename to core/src/main/java/org/apache/iceberg/parquet/ParquetValueReaders.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueWriter.java b/core/src/main/java/org/apache/iceberg/parquet/ParquetValueWriter.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueWriter.java rename to core/src/main/java/org/apache/iceberg/parquet/ParquetValueWriter.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueWriters.java b/core/src/main/java/org/apache/iceberg/parquet/ParquetValueWriters.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueWriters.java rename to core/src/main/java/org/apache/iceberg/parquet/ParquetValueWriters.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetWriteAdapter.java b/core/src/main/java/org/apache/iceberg/parquet/ParquetWriteAdapter.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/ParquetWriteAdapter.java rename to core/src/main/java/org/apache/iceberg/parquet/ParquetWriteAdapter.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetWriteSupport.java b/core/src/main/java/org/apache/iceberg/parquet/ParquetWriteSupport.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/ParquetWriteSupport.java rename to core/src/main/java/org/apache/iceberg/parquet/ParquetWriteSupport.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetWriter.java b/core/src/main/java/org/apache/iceberg/parquet/ParquetWriter.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/ParquetWriter.java rename to core/src/main/java/org/apache/iceberg/parquet/ParquetWriter.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/PruneColumns.java b/core/src/main/java/org/apache/iceberg/parquet/PruneColumns.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/PruneColumns.java rename to core/src/main/java/org/apache/iceberg/parquet/PruneColumns.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ReadConf.java b/core/src/main/java/org/apache/iceberg/parquet/ReadConf.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/ReadConf.java rename to core/src/main/java/org/apache/iceberg/parquet/ReadConf.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/RemoveIds.java b/core/src/main/java/org/apache/iceberg/parquet/RemoveIds.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/RemoveIds.java rename to core/src/main/java/org/apache/iceberg/parquet/RemoveIds.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/TripleIterator.java b/core/src/main/java/org/apache/iceberg/parquet/TripleIterator.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/TripleIterator.java rename to core/src/main/java/org/apache/iceberg/parquet/TripleIterator.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/TripleWriter.java b/core/src/main/java/org/apache/iceberg/parquet/TripleWriter.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/TripleWriter.java rename to core/src/main/java/org/apache/iceberg/parquet/TripleWriter.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/TypeToMessageType.java b/core/src/main/java/org/apache/iceberg/parquet/TypeToMessageType.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/TypeToMessageType.java rename to core/src/main/java/org/apache/iceberg/parquet/TypeToMessageType.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/TypeWithSchemaVisitor.java b/core/src/main/java/org/apache/iceberg/parquet/TypeWithSchemaVisitor.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/TypeWithSchemaVisitor.java rename to core/src/main/java/org/apache/iceberg/parquet/TypeWithSchemaVisitor.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ValuesAsBytesReader.java b/core/src/main/java/org/apache/iceberg/parquet/ValuesAsBytesReader.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/ValuesAsBytesReader.java rename to core/src/main/java/org/apache/iceberg/parquet/ValuesAsBytesReader.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/VectorizedParquetReader.java b/core/src/main/java/org/apache/iceberg/parquet/VectorizedParquetReader.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/VectorizedParquetReader.java rename to core/src/main/java/org/apache/iceberg/parquet/VectorizedParquetReader.java diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/VectorizedReader.java b/core/src/main/java/org/apache/iceberg/parquet/VectorizedReader.java similarity index 100% rename from parquet/src/main/java/org/apache/iceberg/parquet/VectorizedReader.java rename to core/src/main/java/org/apache/iceberg/parquet/VectorizedReader.java diff --git a/parquet/src/test/java/org/apache/iceberg/avro/TestParquetReadProjection.java b/core/src/test/java/org/apache/iceberg/avro/TestParquetReadProjection.java similarity index 100% rename from parquet/src/test/java/org/apache/iceberg/avro/TestParquetReadProjection.java rename to core/src/test/java/org/apache/iceberg/avro/TestParquetReadProjection.java diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/ParquetWritingTestUtils.java b/core/src/test/java/org/apache/iceberg/parquet/ParquetWritingTestUtils.java similarity index 100% rename from parquet/src/test/java/org/apache/iceberg/parquet/ParquetWritingTestUtils.java rename to core/src/test/java/org/apache/iceberg/parquet/ParquetWritingTestUtils.java diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/TestBloomRowGroupFilter.java b/core/src/test/java/org/apache/iceberg/parquet/TestBloomRowGroupFilter.java similarity index 100% rename from parquet/src/test/java/org/apache/iceberg/parquet/TestBloomRowGroupFilter.java rename to core/src/test/java/org/apache/iceberg/parquet/TestBloomRowGroupFilter.java diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/TestCDHParquetStatistics.java b/core/src/test/java/org/apache/iceberg/parquet/TestCDHParquetStatistics.java similarity index 100% rename from parquet/src/test/java/org/apache/iceberg/parquet/TestCDHParquetStatistics.java rename to core/src/test/java/org/apache/iceberg/parquet/TestCDHParquetStatistics.java diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/TestDictionaryRowGroupFilter.java b/core/src/test/java/org/apache/iceberg/parquet/TestDictionaryRowGroupFilter.java similarity index 100% rename from parquet/src/test/java/org/apache/iceberg/parquet/TestDictionaryRowGroupFilter.java rename to core/src/test/java/org/apache/iceberg/parquet/TestDictionaryRowGroupFilter.java diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/TestInternalParquet.java b/core/src/test/java/org/apache/iceberg/parquet/TestInternalParquet.java similarity index 100% rename from parquet/src/test/java/org/apache/iceberg/parquet/TestInternalParquet.java rename to core/src/test/java/org/apache/iceberg/parquet/TestInternalParquet.java diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/TestParquet.java b/core/src/test/java/org/apache/iceberg/parquet/TestParquet.java similarity index 100% rename from parquet/src/test/java/org/apache/iceberg/parquet/TestParquet.java rename to core/src/test/java/org/apache/iceberg/parquet/TestParquet.java diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/TestParquetDataWriter.java b/core/src/test/java/org/apache/iceberg/parquet/TestParquetDataWriter.java similarity index 100% rename from parquet/src/test/java/org/apache/iceberg/parquet/TestParquetDataWriter.java rename to core/src/test/java/org/apache/iceberg/parquet/TestParquetDataWriter.java diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/TestParquetDeleteWriters.java b/core/src/test/java/org/apache/iceberg/parquet/TestParquetDeleteWriters.java similarity index 100% rename from parquet/src/test/java/org/apache/iceberg/parquet/TestParquetDeleteWriters.java rename to core/src/test/java/org/apache/iceberg/parquet/TestParquetDeleteWriters.java diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/TestParquetEncryption.java b/core/src/test/java/org/apache/iceberg/parquet/TestParquetEncryption.java similarity index 100% rename from parquet/src/test/java/org/apache/iceberg/parquet/TestParquetEncryption.java rename to core/src/test/java/org/apache/iceberg/parquet/TestParquetEncryption.java diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/TestParquetSchemaUtil.java b/core/src/test/java/org/apache/iceberg/parquet/TestParquetSchemaUtil.java similarity index 100% rename from parquet/src/test/java/org/apache/iceberg/parquet/TestParquetSchemaUtil.java rename to core/src/test/java/org/apache/iceberg/parquet/TestParquetSchemaUtil.java diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/TestPruneColumns.java b/core/src/test/java/org/apache/iceberg/parquet/TestPruneColumns.java similarity index 100% rename from parquet/src/test/java/org/apache/iceberg/parquet/TestPruneColumns.java rename to core/src/test/java/org/apache/iceberg/parquet/TestPruneColumns.java diff --git a/docs/docs/api.md b/docs/docs/api.md index e4ea1b1043b4..aeefd902be07 100644 --- a/docs/docs/api.md +++ b/docs/docs/api.md @@ -239,7 +239,6 @@ Iceberg table support is organized in library modules: * `iceberg-arrow` is an implementation of the Iceberg type system for reading and writing data stored in Iceberg tables using Apache Arrow as the in-memory data format * `iceberg-aws` contains implementations of the Iceberg API to be used with tables stored on AWS S3 and/or for tables defined using the AWS Glue data catalog * `iceberg-core` contains implementations of the Iceberg API and support for Avro data files, **this is what processing engines should depend on** -* `iceberg-parquet` is an optional module for working with tables backed by Parquet files * `iceberg-orc` is an optional module for working with tables backed by ORC files (*experimental*) * `iceberg-hive-metastore` is an implementation of Iceberg tables backed by the Hive metastore Thrift client diff --git a/flink/v1.18/build.gradle b/flink/v1.18/build.gradle index 83dc07523a3c..7d62ef53052b 100644 --- a/flink/v1.18/build.gradle +++ b/flink/v1.18/build.gradle @@ -29,7 +29,6 @@ project(":iceberg-flink:iceberg-flink-${flinkMajorVersion}") { implementation project(':iceberg-core') api project(':iceberg-data') implementation project(':iceberg-orc') - implementation project(':iceberg-parquet') implementation project(':iceberg-hive-metastore') compileOnly libs.flink118.avro diff --git a/flink/v1.19/build.gradle b/flink/v1.19/build.gradle index 50bcadb618e4..b17a45464c73 100644 --- a/flink/v1.19/build.gradle +++ b/flink/v1.19/build.gradle @@ -29,7 +29,6 @@ project(":iceberg-flink:iceberg-flink-${flinkMajorVersion}") { implementation project(':iceberg-core') api project(':iceberg-data') implementation project(':iceberg-orc') - implementation project(':iceberg-parquet') implementation project(':iceberg-hive-metastore') compileOnly libs.flink119.avro diff --git a/flink/v1.20/build.gradle b/flink/v1.20/build.gradle index 4a1bae660bdb..90a54122f05a 100644 --- a/flink/v1.20/build.gradle +++ b/flink/v1.20/build.gradle @@ -29,7 +29,6 @@ project(":iceberg-flink:iceberg-flink-${flinkMajorVersion}") { implementation project(':iceberg-core') api project(':iceberg-data') implementation project(':iceberg-orc') - implementation project(':iceberg-parquet') implementation project(':iceberg-hive-metastore') compileOnly libs.flink120.avro diff --git a/kafka-connect/build.gradle b/kafka-connect/build.gradle index 15bf013f28b2..659fe78257aa 100644 --- a/kafka-connect/build.gradle +++ b/kafka-connect/build.gradle @@ -49,7 +49,6 @@ project(':iceberg-kafka-connect:iceberg-kafka-connect') { compileOnly libs.kafka.connect.json testImplementation libs.hadoop3.client - testRuntimeOnly project(':iceberg-parquet') testRuntimeOnly project(':iceberg-orc') } @@ -108,7 +107,6 @@ project(':iceberg-kafka-connect:iceberg-kafka-connect-runtime') { exclude group: 'org.eclipse.jetty' } implementation project(':iceberg-orc') - implementation project(':iceberg-parquet') implementation project(':iceberg-aws') implementation platform(libs.awssdk.bom) diff --git a/mr/build.gradle b/mr/build.gradle index 557b46c682a1..29da047dc23a 100644 --- a/mr/build.gradle +++ b/mr/build.gradle @@ -35,7 +35,6 @@ project(':iceberg-mr') { api project(':iceberg-data') implementation project(':iceberg-hive-metastore') implementation project(':iceberg-orc') - implementation project(':iceberg-parquet') compileOnly(libs.hadoop2.client) { exclude group: 'org.apache.avro', module: 'avro' diff --git a/settings.gradle b/settings.gradle index efd9336d2e7e..85899be0038c 100644 --- a/settings.gradle +++ b/settings.gradle @@ -30,7 +30,6 @@ include 'azure' include 'azure-bundle' include 'orc' include 'arrow' -include 'parquet' include 'bundled-guava' include 'spark' include 'hive-metastore' @@ -55,7 +54,6 @@ project(':azure').name = 'iceberg-azure' project(':azure-bundle').name = 'iceberg-azure-bundle' project(':orc').name = 'iceberg-orc' project(':arrow').name = 'iceberg-arrow' -project(':parquet').name = 'iceberg-parquet' project(':bundled-guava').name = 'iceberg-bundled-guava' project(':spark').name = 'iceberg-spark' project(':hive-metastore').name = 'iceberg-hive-metastore' diff --git a/site/docs/contribute.md b/site/docs/contribute.md index a12936a7bc49..4a0629c2e0fa 100644 --- a/site/docs/contribute.md +++ b/site/docs/contribute.md @@ -108,7 +108,6 @@ Iceberg table support is organized in library modules: * `iceberg-common` contains utility classes used in other modules * `iceberg-api` contains the public Iceberg API * `iceberg-core` contains implementations of the Iceberg API and support for Avro data files, **this is what processing engines should depend on** -* `iceberg-parquet` is an optional module for working with tables backed by Parquet files * `iceberg-arrow` is an optional module for reading Parquet into Arrow memory * `iceberg-orc` is an optional module for working with tables backed by ORC files * `iceberg-hive-metastore` is an implementation of Iceberg tables backed by the Hive metastore Thrift client @@ -155,7 +154,6 @@ __Modules__ `iceberg-core` `iceberg-data` `iceberg-orc` -`iceberg-parquet` Changes to public interfaces and classes in the subprojects listed above require a deprecation cycle of one minor release. diff --git a/site/docs/releases.md b/site/docs/releases.md index 27ad3b8bfc4c..a1db0521f24f 100644 --- a/site/docs/releases.md +++ b/site/docs/releases.md @@ -54,8 +54,6 @@ dependencies { } ``` -You may also want to include `iceberg-parquet` for Parquet file support. - ### Maven To add a dependency on Iceberg in Maven, add the following to your `pom.xml`: diff --git a/spark/v3.3/build.gradle b/spark/v3.3/build.gradle index c8d4b4549f4a..7d38ec43a6ab 100644 --- a/spark/v3.3/build.gradle +++ b/spark/v3.3/build.gradle @@ -56,7 +56,6 @@ project(":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}") { implementation project(':iceberg-core') implementation project(':iceberg-data') implementation project(':iceberg-orc') - implementation project(':iceberg-parquet') implementation project(':iceberg-arrow') implementation("org.scala-lang.modules:scala-collection-compat_${scalaVersion}:${libs.versions.scala.collection.compat.get()}") @@ -157,7 +156,6 @@ project(":iceberg-spark:iceberg-spark-extensions-${sparkMajorVersion}_${scalaVer compileOnly libs.errorprone.annotations testImplementation project(path: ':iceberg-data') - testImplementation project(path: ':iceberg-parquet') testImplementation project(path: ':iceberg-hive-metastore') testImplementation project(path: ':iceberg-api', configuration: 'testArtifacts') testImplementation project(path: ':iceberg-hive-metastore', configuration: 'testArtifacts') diff --git a/spark/v3.4/build.gradle b/spark/v3.4/build.gradle index fe81b974cedd..6c5c2655630a 100644 --- a/spark/v3.4/build.gradle +++ b/spark/v3.4/build.gradle @@ -56,7 +56,6 @@ project(":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}") { implementation project(':iceberg-core') implementation project(':iceberg-data') implementation project(':iceberg-orc') - implementation project(':iceberg-parquet') implementation project(':iceberg-arrow') implementation("org.scala-lang.modules:scala-collection-compat_${scalaVersion}:${libs.versions.scala.collection.compat.get()}") implementation("org.apache.datasketches:datasketches-java:${libs.versions.datasketches.get()}") @@ -164,7 +163,6 @@ project(":iceberg-spark:iceberg-spark-extensions-${sparkMajorVersion}_${scalaVer compileOnly libs.errorprone.annotations testImplementation project(path: ':iceberg-data') - testImplementation project(path: ':iceberg-parquet') testImplementation project(path: ':iceberg-hive-metastore') testImplementation project(path: ':iceberg-data', configuration: 'testArtifacts') testImplementation project(path: ':iceberg-api', configuration: 'testArtifacts') diff --git a/spark/v3.5/build.gradle b/spark/v3.5/build.gradle index b5a182f3678e..ddf5e06f0a87 100644 --- a/spark/v3.5/build.gradle +++ b/spark/v3.5/build.gradle @@ -56,7 +56,6 @@ project(":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}") { implementation project(':iceberg-core') implementation project(':iceberg-data') implementation project(':iceberg-orc') - implementation project(':iceberg-parquet') implementation project(':iceberg-arrow') implementation("org.scala-lang.modules:scala-collection-compat_${scalaVersion}:${libs.versions.scala.collection.compat.get()}") implementation("org.apache.datasketches:datasketches-java:${libs.versions.datasketches.get()}") @@ -163,7 +162,6 @@ project(":iceberg-spark:iceberg-spark-extensions-${sparkMajorVersion}_${scalaVer compileOnly libs.errorprone.annotations testImplementation project(path: ':iceberg-data') - testImplementation project(path: ':iceberg-parquet') testImplementation project(path: ':iceberg-hive-metastore') testImplementation project(path: ':iceberg-api', configuration: 'testArtifacts') testImplementation project(path: ':iceberg-core', configuration: 'testArtifacts')