Skip to content

Commit

Permalink
Core: Move iceberg-parquet files to iceberg-core
Browse files Browse the repository at this point in the history
- Since core module need to write stats in parquet format, to avoid circular dependency,
move all the files from iceberg-parquet module to iceberg code.
- `TestParquetReadProjection` used to duplicate the test code of iceberg-api module's `TestReadProjection`.
Removed the duplicate class and instead directly extend the original class from iceberg-api module.
- Update TestParquetReadProjection to skip empty struct testcases as only Avro readers supports it.
The testcases are now common for both Avro and Parquet readers.
  • Loading branch information
ajantha-bhat committed Jan 29, 2025
1 parent e89798e commit a8f416f
Show file tree
Hide file tree
Showing 74 changed files with 12 additions and 53 deletions.
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ Iceberg table support is organized in library modules:
* `iceberg-common` contains utility classes used in other modules
* `iceberg-api` contains the public Iceberg API
* `iceberg-core` contains implementations of the Iceberg API and support for Avro data files, **this is what processing engines should depend on**
* `iceberg-parquet` is an optional module for working with tables backed by Parquet files
* `iceberg-arrow` is an optional module for reading Parquet into Arrow memory
* `iceberg-orc` is an optional module for working with tables backed by ORC files
* `iceberg-hive-metastore` is an implementation of Iceberg tables backed by the Hive metastore Thrift client
Expand Down
45 changes: 12 additions & 33 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ if (file("${rootDir}/iceberg-build.properties").exists()) {
}

def projectVersion = getProjectVersion()
final REVAPI_PROJECTS = ["iceberg-api", "iceberg-core", "iceberg-parquet", "iceberg-orc", "iceberg-common", "iceberg-data"]
final REVAPI_PROJECTS = ["iceberg-api", "iceberg-core", "iceberg-orc", "iceberg-common", "iceberg-data"]

allprojects {
group = "org.apache.iceberg"
Expand Down Expand Up @@ -157,7 +157,7 @@ subprojects {
rootTask.finalizedBy showDeprecationRulesOnRevApiFailure
}
}

tasks.named("revapiAnalyze").configure {
dependsOn(":iceberg-common:jar")
}
Expand Down Expand Up @@ -353,6 +353,13 @@ project(':iceberg-core') {
exclude group: 'org.slf4j', module: 'slf4j-log4j12'
}

implementation(libs.parquet.avro) {
exclude group: 'org.apache.avro', module: 'avro'
// already shaded by Parquet
exclude group: 'it.unimi.dsi'
exclude group: 'org.codehaus.jackson'
}

testImplementation libs.jetty.servlet
testImplementation libs.jakarta.servlet
testImplementation libs.jetty.server
Expand All @@ -371,7 +378,6 @@ project(':iceberg-data') {
implementation project(path: ':iceberg-bundled-guava', configuration: 'shadow')
api project(':iceberg-api')
implementation project(':iceberg-core')
compileOnly project(':iceberg-parquet')
compileOnly project(':iceberg-orc')
compileOnly(libs.hadoop2.common) {
exclude group: 'commons-beanutils'
Expand Down Expand Up @@ -564,7 +570,6 @@ project(':iceberg-delta-lake') {
api project(':iceberg-api')
implementation project(':iceberg-common')
implementation project(':iceberg-core')
implementation project(':iceberg-parquet')
implementation platform(libs.jackson.bom)
implementation libs.jackson.databind
annotationProcessor libs.immutables.value
Expand Down Expand Up @@ -770,33 +775,6 @@ project(':iceberg-orc') {
}
}

project(':iceberg-parquet') {
test {
useJUnitPlatform()
}
dependencies {
implementation project(path: ':iceberg-bundled-guava', configuration: 'shadow')
api project(':iceberg-api')
implementation project(':iceberg-core')
implementation project(':iceberg-common')

implementation(libs.parquet.avro) {
exclude group: 'org.apache.avro', module: 'avro'
// already shaded by Parquet
exclude group: 'it.unimi.dsi'
exclude group: 'org.codehaus.jackson'
}

compileOnly libs.avro.avro
compileOnly(libs.hadoop2.client) {
exclude group: 'org.apache.avro', module: 'avro'
}

testImplementation project(path: ':iceberg-api', configuration: 'testArtifacts')
testImplementation project(path: ':iceberg-core', configuration: 'testArtifacts')
}
}

project(':iceberg-arrow') {
test {
useJUnitPlatform()
Expand All @@ -805,7 +783,6 @@ project(':iceberg-arrow') {
implementation project(path: ':iceberg-bundled-guava', configuration: 'shadow')
api project(':iceberg-api')
implementation project(':iceberg-core')
implementation project(':iceberg-parquet')

implementation(libs.arrow.vector) {
exclude group: 'io.netty', module: 'netty-buffer'
Expand Down Expand Up @@ -897,7 +874,9 @@ project(':iceberg-snowflake') {
}

dependencies {
implementation project(':iceberg-core')
implementation(project(':iceberg-core')) {
exclude group: 'com.github.luben', module: 'zstd-jni'
}
implementation project(':iceberg-common')
implementation project(path: ':iceberg-bundled-guava', configuration: 'shadow')
implementation platform(libs.jackson.bom)
Expand Down
1 change: 0 additions & 1 deletion docs/docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,6 @@ Iceberg table support is organized in library modules:
* `iceberg-arrow` is an implementation of the Iceberg type system for reading and writing data stored in Iceberg tables using Apache Arrow as the in-memory data format
* `iceberg-aws` contains implementations of the Iceberg API to be used with tables stored on AWS S3 and/or for tables defined using the AWS Glue data catalog
* `iceberg-core` contains implementations of the Iceberg API and support for Avro data files, **this is what processing engines should depend on**
* `iceberg-parquet` is an optional module for working with tables backed by Parquet files
* `iceberg-orc` is an optional module for working with tables backed by ORC files (*experimental*)
* `iceberg-hive-metastore` is an implementation of Iceberg tables backed by the Hive metastore Thrift client

Expand Down
1 change: 0 additions & 1 deletion flink/v1.18/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ project(":iceberg-flink:iceberg-flink-${flinkMajorVersion}") {
implementation project(':iceberg-core')
api project(':iceberg-data')
implementation project(':iceberg-orc')
implementation project(':iceberg-parquet')
implementation project(':iceberg-hive-metastore')

compileOnly libs.flink118.avro
Expand Down
1 change: 0 additions & 1 deletion flink/v1.19/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ project(":iceberg-flink:iceberg-flink-${flinkMajorVersion}") {
implementation project(':iceberg-core')
api project(':iceberg-data')
implementation project(':iceberg-orc')
implementation project(':iceberg-parquet')
implementation project(':iceberg-hive-metastore')

compileOnly libs.flink119.avro
Expand Down
1 change: 0 additions & 1 deletion flink/v1.20/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ project(":iceberg-flink:iceberg-flink-${flinkMajorVersion}") {
implementation project(':iceberg-core')
api project(':iceberg-data')
implementation project(':iceberg-orc')
implementation project(':iceberg-parquet')
implementation project(':iceberg-hive-metastore')

compileOnly libs.flink120.avro
Expand Down
2 changes: 0 additions & 2 deletions kafka-connect/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ project(':iceberg-kafka-connect:iceberg-kafka-connect') {
compileOnly libs.kafka.connect.json

testImplementation libs.hadoop3.client
testRuntimeOnly project(':iceberg-parquet')
testRuntimeOnly project(':iceberg-orc')
}

Expand Down Expand Up @@ -108,7 +107,6 @@ project(':iceberg-kafka-connect:iceberg-kafka-connect-runtime') {
exclude group: 'org.eclipse.jetty'
}
implementation project(':iceberg-orc')
implementation project(':iceberg-parquet')

implementation project(':iceberg-aws')
implementation platform(libs.awssdk.bom)
Expand Down
1 change: 0 additions & 1 deletion mr/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ project(':iceberg-mr') {
api project(':iceberg-data')
implementation project(':iceberg-hive-metastore')
implementation project(':iceberg-orc')
implementation project(':iceberg-parquet')

compileOnly(libs.hadoop2.client) {
exclude group: 'org.apache.avro', module: 'avro'
Expand Down
2 changes: 0 additions & 2 deletions settings.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ include 'azure'
include 'azure-bundle'
include 'orc'
include 'arrow'
include 'parquet'
include 'bundled-guava'
include 'spark'
include 'hive-metastore'
Expand All @@ -55,7 +54,6 @@ project(':azure').name = 'iceberg-azure'
project(':azure-bundle').name = 'iceberg-azure-bundle'
project(':orc').name = 'iceberg-orc'
project(':arrow').name = 'iceberg-arrow'
project(':parquet').name = 'iceberg-parquet'
project(':bundled-guava').name = 'iceberg-bundled-guava'
project(':spark').name = 'iceberg-spark'
project(':hive-metastore').name = 'iceberg-hive-metastore'
Expand Down
2 changes: 0 additions & 2 deletions site/docs/contribute.md
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,6 @@ Iceberg table support is organized in library modules:
* `iceberg-common` contains utility classes used in other modules
* `iceberg-api` contains the public Iceberg API
* `iceberg-core` contains implementations of the Iceberg API and support for Avro data files, **this is what processing engines should depend on**
* `iceberg-parquet` is an optional module for working with tables backed by Parquet files
* `iceberg-arrow` is an optional module for reading Parquet into Arrow memory
* `iceberg-orc` is an optional module for working with tables backed by ORC files
* `iceberg-hive-metastore` is an implementation of Iceberg tables backed by the Hive metastore Thrift client
Expand Down Expand Up @@ -155,7 +154,6 @@ __Modules__
`iceberg-core`
`iceberg-data`
`iceberg-orc`
`iceberg-parquet`

Changes to public interfaces and classes in the subprojects listed above require a deprecation cycle of one minor
release.
Expand Down
2 changes: 0 additions & 2 deletions site/docs/releases.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,6 @@ dependencies {
}
```

You may also want to include `iceberg-parquet` for Parquet file support.

### Maven

To add a dependency on Iceberg in Maven, add the following to your `pom.xml`:
Expand Down
2 changes: 0 additions & 2 deletions spark/v3.3/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ project(":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}") {
implementation project(':iceberg-core')
implementation project(':iceberg-data')
implementation project(':iceberg-orc')
implementation project(':iceberg-parquet')
implementation project(':iceberg-arrow')
implementation("org.scala-lang.modules:scala-collection-compat_${scalaVersion}:${libs.versions.scala.collection.compat.get()}")

Expand Down Expand Up @@ -157,7 +156,6 @@ project(":iceberg-spark:iceberg-spark-extensions-${sparkMajorVersion}_${scalaVer
compileOnly libs.errorprone.annotations

testImplementation project(path: ':iceberg-data')
testImplementation project(path: ':iceberg-parquet')
testImplementation project(path: ':iceberg-hive-metastore')
testImplementation project(path: ':iceberg-api', configuration: 'testArtifacts')
testImplementation project(path: ':iceberg-hive-metastore', configuration: 'testArtifacts')
Expand Down
2 changes: 0 additions & 2 deletions spark/v3.4/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ project(":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}") {
implementation project(':iceberg-core')
implementation project(':iceberg-data')
implementation project(':iceberg-orc')
implementation project(':iceberg-parquet')
implementation project(':iceberg-arrow')
implementation("org.scala-lang.modules:scala-collection-compat_${scalaVersion}:${libs.versions.scala.collection.compat.get()}")
implementation("org.apache.datasketches:datasketches-java:${libs.versions.datasketches.get()}")
Expand Down Expand Up @@ -164,7 +163,6 @@ project(":iceberg-spark:iceberg-spark-extensions-${sparkMajorVersion}_${scalaVer
compileOnly libs.errorprone.annotations

testImplementation project(path: ':iceberg-data')
testImplementation project(path: ':iceberg-parquet')
testImplementation project(path: ':iceberg-hive-metastore')
testImplementation project(path: ':iceberg-data', configuration: 'testArtifacts')
testImplementation project(path: ':iceberg-api', configuration: 'testArtifacts')
Expand Down
2 changes: 0 additions & 2 deletions spark/v3.5/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ project(":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}") {
implementation project(':iceberg-core')
implementation project(':iceberg-data')
implementation project(':iceberg-orc')
implementation project(':iceberg-parquet')
implementation project(':iceberg-arrow')
implementation("org.scala-lang.modules:scala-collection-compat_${scalaVersion}:${libs.versions.scala.collection.compat.get()}")
implementation("org.apache.datasketches:datasketches-java:${libs.versions.datasketches.get()}")
Expand Down Expand Up @@ -163,7 +162,6 @@ project(":iceberg-spark:iceberg-spark-extensions-${sparkMajorVersion}_${scalaVer
compileOnly libs.errorprone.annotations

testImplementation project(path: ':iceberg-data')
testImplementation project(path: ':iceberg-parquet')
testImplementation project(path: ':iceberg-hive-metastore')
testImplementation project(path: ':iceberg-api', configuration: 'testArtifacts')
testImplementation project(path: ':iceberg-core', configuration: 'testArtifacts')
Expand Down

0 comments on commit a8f416f

Please sign in to comment.