From 7f7d4cb5ce0f101726a25aba30decbadcae19783 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Fri, 10 Nov 2023 15:18:09 -0800 Subject: [PATCH 01/28] Consolidate delta-lake poms Signed-off-by: Gera Shegalov --- delta-lake/common/pom.xml | 87 +++++++++++++++++++++++++++++ delta-lake/delta-20x/pom.xml | 40 +------------ delta-lake/delta-21x/pom.xml | 40 +------------ delta-lake/delta-22x/pom.xml | 40 +------------ delta-lake/delta-23x/pom.xml | 42 +------------- delta-lake/delta-24x/pom.xml | 40 +------------ delta-lake/delta-spark321db/pom.xml | 36 +----------- delta-lake/delta-spark330db/pom.xml | 36 +----------- delta-lake/delta-spark332db/pom.xml | 36 +----------- pom.xml | 48 ++++++++++------ 10 files changed, 133 insertions(+), 312 deletions(-) create mode 100644 delta-lake/common/pom.xml diff --git a/delta-lake/common/pom.xml b/delta-lake/common/pom.xml new file mode 100644 index 00000000000..ced0a356789 --- /dev/null +++ b/delta-lake/common/pom.xml @@ -0,0 +1,87 @@ + + + + 4.0.0 + + + com.nvidia + rapids-4-spark-jdk-profiles_2.12 + 23.12.0-SNAPSHOT + ../../jdk-profiles/pom.xml + + + rapids-4-spark-delta-common_2.12 + Delta Lake Parent for the RAPIDS Accelerator for Apache Spark + 23.12.0-SNAPSHOT + pom + + + ../delta-lake/common + false + **/* + package + + + + + com.nvidia + rapids-4-spark-sql_${scala.binary.version} + ${project.version} + ${spark.version.classifier} + + + org.apache.spark + spark-sql_${scala.binary.version} + + + + + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-common-sources + generate-sources + + add-source + + + + + ${project.basedir}/../common/src/main/scala + ${project.basedir}/../common/src/main/delta-io/scala + + + + + + + + + org.apache.rat + apache-rat-plugin + + + + diff --git a/delta-lake/delta-20x/pom.xml b/delta-lake/delta-20x/pom.xml index 5cb0e2e2e4e..0689c2825af 100644 --- a/delta-lake/delta-20x/pom.xml +++ b/delta-lake/delta-20x/pom.xml @@ -21,9 +21,9 @@ com.nvidia - rapids-4-spark-jdk-profiles_2.12 + rapids-4-spark-delta-common_2.12 23.12.0-SNAPSHOT - ../../jdk-profiles/pom.xml + ../common/pom.xml rapids-4-spark-delta-20x_2.12 @@ -39,52 +39,16 @@ - - com.nvidia - rapids-4-spark-sql_${scala.binary.version} - ${project.version} - ${spark.version.classifier} - provided - io.delta delta-core_${scala.binary.version} 2.0.1 provided - - org.apache.spark - spark-sql_${scala.binary.version} - - - org.codehaus.mojo - build-helper-maven-plugin - - - add-common-sources - generate-sources - - add-source - - - - - ${project.basedir}/../common/src/main/scala - ${project.basedir}/../common/src/main/delta-io/scala - - - - - - - net.alchim31.maven scala-maven-plugin diff --git a/delta-lake/delta-21x/pom.xml b/delta-lake/delta-21x/pom.xml index 5b4e1225722..85aa0839e2f 100644 --- a/delta-lake/delta-21x/pom.xml +++ b/delta-lake/delta-21x/pom.xml @@ -21,9 +21,9 @@ com.nvidia - rapids-4-spark-jdk-profiles_2.12 + rapids-4-spark-delta-common_2.12 23.12.0-SNAPSHOT - ../../jdk-profiles/pom.xml + ../common/pom.xml rapids-4-spark-delta-21x_2.12 @@ -39,52 +39,16 @@ - - com.nvidia - rapids-4-spark-sql_${scala.binary.version} - ${project.version} - ${spark.version.classifier} - provided - io.delta delta-core_${scala.binary.version} 2.1.1 provided - - org.apache.spark - spark-sql_${scala.binary.version} - - - org.codehaus.mojo - build-helper-maven-plugin - - - add-common-sources - generate-sources - - add-source - - - - - ${project.basedir}/../common/src/main/scala - ${project.basedir}/../common/src/main/delta-io/scala - - - - - - - net.alchim31.maven scala-maven-plugin diff --git a/delta-lake/delta-22x/pom.xml b/delta-lake/delta-22x/pom.xml index 0b6d2175f2f..d63ba2ef3c7 100644 --- a/delta-lake/delta-22x/pom.xml +++ b/delta-lake/delta-22x/pom.xml @@ -21,9 +21,9 @@ com.nvidia - rapids-4-spark-jdk-profiles_2.12 + rapids-4-spark-delta-common_2.12 23.12.0-SNAPSHOT - ../../jdk-profiles/pom.xml + ../common/pom.xml rapids-4-spark-delta-22x_2.12 @@ -39,52 +39,16 @@ - - com.nvidia - rapids-4-spark-sql_${scala.binary.version} - ${project.version} - ${spark.version.classifier} - provided - io.delta delta-core_${scala.binary.version} 2.2.0 provided - - org.apache.spark - spark-sql_${scala.binary.version} - - - org.codehaus.mojo - build-helper-maven-plugin - - - add-common-sources - generate-sources - - add-source - - - - - ${project.basedir}/../common/src/main/scala - ${project.basedir}/../common/src/main/delta-io/scala - - - - - - - net.alchim31.maven scala-maven-plugin diff --git a/delta-lake/delta-23x/pom.xml b/delta-lake/delta-23x/pom.xml index 9b8cb489cb6..b674ada5183 100644 --- a/delta-lake/delta-23x/pom.xml +++ b/delta-lake/delta-23x/pom.xml @@ -21,9 +21,9 @@ com.nvidia - rapids-4-spark-parent_2.12 + rapids-4-spark-delta-common_2.12 23.12.0-SNAPSHOT - ../../pom.xml + ../common/pom.xml rapids-4-spark-delta-23x_2.12 @@ -37,54 +37,16 @@ **/* package - - - com.nvidia - rapids-4-spark-sql_${scala.binary.version} - ${project.version} - ${spark.version.classifier} - provided - io.delta delta-core_${scala.binary.version} 2.3.0 provided - - org.apache.spark - spark-sql_${scala.binary.version} - - - - org.codehaus.mojo - build-helper-maven-plugin - - - add-common-sources - generate-sources - - add-source - - - - - ${project.basedir}/../common/src/main/scala - ${project.basedir}/../common/src/main/delta-io/scala - - - - - - - net.alchim31.maven scala-maven-plugin diff --git a/delta-lake/delta-24x/pom.xml b/delta-lake/delta-24x/pom.xml index 93f625397bf..a0bec07dc87 100644 --- a/delta-lake/delta-24x/pom.xml +++ b/delta-lake/delta-24x/pom.xml @@ -21,9 +21,9 @@ com.nvidia - rapids-4-spark-jdk-profiles_2.12 + rapids-4-spark-delta-common_2.12 23.12.0-SNAPSHOT - ../../jdk-profiles/pom.xml + ../common/pom.xml rapids-4-spark-delta-24x_2.12 @@ -39,52 +39,16 @@ - - com.nvidia - rapids-4-spark-sql_${scala.binary.version} - ${project.version} - ${spark.version.classifier} - provided - io.delta delta-core_${scala.binary.version} 2.4.0 provided - - org.apache.spark - spark-sql_${scala.binary.version} - - - org.codehaus.mojo - build-helper-maven-plugin - - - add-common-sources - generate-sources - - add-source - - - - - ${project.basedir}/../common/src/main/scala - ${project.basedir}/../common/src/main/delta-io/scala - - - - - - - net.alchim31.maven scala-maven-plugin diff --git a/delta-lake/delta-spark321db/pom.xml b/delta-lake/delta-spark321db/pom.xml index 95f9146f51a..863101e22ba 100644 --- a/delta-lake/delta-spark321db/pom.xml +++ b/delta-lake/delta-spark321db/pom.xml @@ -21,9 +21,9 @@ com.nvidia - rapids-4-spark-jdk-profiles_2.12 + rapids-4-spark-delta-common_2.12 23.12.0-SNAPSHOT - ../../jdk-profiles/pom.xml + ../common/pom.xml rapids-4-spark-delta-spark321db_2.12 @@ -39,13 +39,6 @@ - - com.nvidia - rapids-4-spark-sql_${scala.binary.version} - ${project.version} - ${spark.version.classifier} - provided - com.nvidia rapids-4-spark-db-bom @@ -57,31 +50,6 @@ - - org.codehaus.mojo - build-helper-maven-plugin - - - add-common-sources - generate-sources - - add-source - - - - - ${project.basedir}/../common/src/main/scala - ${project.basedir}/../common/src/main/databricks/scala - - - - - - - net.alchim31.maven scala-maven-plugin diff --git a/delta-lake/delta-spark330db/pom.xml b/delta-lake/delta-spark330db/pom.xml index c8ed34bd539..8f4446d462b 100644 --- a/delta-lake/delta-spark330db/pom.xml +++ b/delta-lake/delta-spark330db/pom.xml @@ -21,9 +21,9 @@ com.nvidia - rapids-4-spark-jdk-profiles_2.12 + rapids-4-spark-delta-common_2.12 23.12.0-SNAPSHOT - ../../jdk-profiles/pom.xml + ../common/pom.xml rapids-4-spark-delta-spark330db_2.12 @@ -39,13 +39,6 @@ - - com.nvidia - rapids-4-spark-sql_${scala.binary.version} - ${project.version} - ${spark.version.classifier} - provided - com.nvidia rapids-4-spark-db-bom @@ -57,31 +50,6 @@ - - org.codehaus.mojo - build-helper-maven-plugin - - - add-common-sources - generate-sources - - add-source - - - - - ${project.basedir}/../common/src/main/scala - ${project.basedir}/../common/src/main/databricks/scala - - - - - - - net.alchim31.maven scala-maven-plugin diff --git a/delta-lake/delta-spark332db/pom.xml b/delta-lake/delta-spark332db/pom.xml index 1d81d63aa94..9cf1c1a682c 100644 --- a/delta-lake/delta-spark332db/pom.xml +++ b/delta-lake/delta-spark332db/pom.xml @@ -21,9 +21,9 @@ com.nvidia - rapids-4-spark-jdk-profiles_2.12 + rapids-4-spark-delta-common_2.12 23.12.0-SNAPSHOT - ../../jdk-profiles/pom.xml + ../common/pom.xml rapids-4-spark-delta-spark332db_2.12 @@ -39,13 +39,6 @@ - - com.nvidia - rapids-4-spark-sql_${scala.binary.version} - ${project.version} - ${spark.version.classifier} - provided - com.nvidia rapids-4-spark-db-bom @@ -57,31 +50,6 @@ - - org.codehaus.mojo - build-helper-maven-plugin - - - add-common-sources - generate-sources - - add-source - - - - - ${project.basedir}/../common/src/main/scala - ${project.basedir}/../common/src/main/databricks/scala - - - - - - - net.alchim31.maven scala-maven-plugin diff --git a/pom.xml b/pom.xml index afb519ffc03..7510a7f2716 100644 --- a/pom.xml +++ b/pom.xml @@ -80,6 +80,7 @@ sql-plugin-api tests udf-compiler + delta-lake/common - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + From 91e718ad89710f3ddca4536d19766172afe92ea8 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Fri, 10 Nov 2023 16:01:51 -0800 Subject: [PATCH 02/28] skip build-info if revision unchanged --- pom.xml | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 7510a7f2716..d0ad9864493 100644 --- a/pom.xml +++ b/pom.xml @@ -976,12 +976,21 @@ + + + + + - + + Regenerating build info properties because head revision changed: + previous=${saved.build-info.revision} + current=${git.head.revision} + - + From afb81b2f45ab6c3333d3c873cf90cf1e7599770f Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Fri, 10 Nov 2023 16:22:31 -0800 Subject: [PATCH 03/28] scala2.13 Signed-off-by: Gera Shegalov --- scala2.13/delta-lake/delta-20x/pom.xml | 40 +------------- scala2.13/delta-lake/delta-21x/pom.xml | 40 +------------- scala2.13/delta-lake/delta-22x/pom.xml | 40 +------------- scala2.13/delta-lake/delta-23x/pom.xml | 42 +------------- scala2.13/delta-lake/delta-24x/pom.xml | 40 +------------- scala2.13/delta-lake/delta-spark321db/pom.xml | 36 +----------- scala2.13/delta-lake/delta-spark330db/pom.xml | 36 +----------- scala2.13/delta-lake/delta-spark332db/pom.xml | 36 +----------- scala2.13/pom.xml | 55 +++++++++++++------ 9 files changed, 54 insertions(+), 311 deletions(-) diff --git a/scala2.13/delta-lake/delta-20x/pom.xml b/scala2.13/delta-lake/delta-20x/pom.xml index 688d0154734..f3c3d46ff58 100644 --- a/scala2.13/delta-lake/delta-20x/pom.xml +++ b/scala2.13/delta-lake/delta-20x/pom.xml @@ -21,9 +21,9 @@ com.nvidia - rapids-4-spark-jdk-profiles_2.13 + rapids-4-spark-delta-common_2.13 23.12.0-SNAPSHOT - ../../jdk-profiles/pom.xml + ../common/pom.xml rapids-4-spark-delta-20x_2.13 @@ -39,52 +39,16 @@ - - com.nvidia - rapids-4-spark-sql_${scala.binary.version} - ${project.version} - ${spark.version.classifier} - provided - io.delta delta-core_${scala.binary.version} 2.0.1 provided - - org.apache.spark - spark-sql_${scala.binary.version} - - - org.codehaus.mojo - build-helper-maven-plugin - - - add-common-sources - generate-sources - - add-source - - - - - - ${project.basedir}/../../${rapids.module}/../common/src/main/scala - ${project.basedir}/../../${rapids.module}/../common/src/main/delta-io/scala - - - - - - net.alchim31.maven scala-maven-plugin diff --git a/scala2.13/delta-lake/delta-21x/pom.xml b/scala2.13/delta-lake/delta-21x/pom.xml index 8a5b5d0b8f4..475c028a849 100644 --- a/scala2.13/delta-lake/delta-21x/pom.xml +++ b/scala2.13/delta-lake/delta-21x/pom.xml @@ -21,9 +21,9 @@ com.nvidia - rapids-4-spark-jdk-profiles_2.13 + rapids-4-spark-delta-common_2.13 23.12.0-SNAPSHOT - ../../jdk-profiles/pom.xml + ../common/pom.xml rapids-4-spark-delta-21x_2.13 @@ -39,52 +39,16 @@ - - com.nvidia - rapids-4-spark-sql_${scala.binary.version} - ${project.version} - ${spark.version.classifier} - provided - io.delta delta-core_${scala.binary.version} 2.1.1 provided - - org.apache.spark - spark-sql_${scala.binary.version} - - - org.codehaus.mojo - build-helper-maven-plugin - - - add-common-sources - generate-sources - - add-source - - - - - - ${project.basedir}/../../${rapids.module}/../common/src/main/scala - ${project.basedir}/../../${rapids.module}/../common/src/main/delta-io/scala - - - - - - net.alchim31.maven scala-maven-plugin diff --git a/scala2.13/delta-lake/delta-22x/pom.xml b/scala2.13/delta-lake/delta-22x/pom.xml index 58d417bb1ed..8c16e936b7f 100644 --- a/scala2.13/delta-lake/delta-22x/pom.xml +++ b/scala2.13/delta-lake/delta-22x/pom.xml @@ -21,9 +21,9 @@ com.nvidia - rapids-4-spark-jdk-profiles_2.13 + rapids-4-spark-delta-common_2.13 23.12.0-SNAPSHOT - ../../jdk-profiles/pom.xml + ../common/pom.xml rapids-4-spark-delta-22x_2.13 @@ -39,52 +39,16 @@ - - com.nvidia - rapids-4-spark-sql_${scala.binary.version} - ${project.version} - ${spark.version.classifier} - provided - io.delta delta-core_${scala.binary.version} 2.2.0 provided - - org.apache.spark - spark-sql_${scala.binary.version} - - - org.codehaus.mojo - build-helper-maven-plugin - - - add-common-sources - generate-sources - - add-source - - - - - - ${project.basedir}/../../${rapids.module}/../common/src/main/scala - ${project.basedir}/../../${rapids.module}/../common/src/main/delta-io/scala - - - - - - net.alchim31.maven scala-maven-plugin diff --git a/scala2.13/delta-lake/delta-23x/pom.xml b/scala2.13/delta-lake/delta-23x/pom.xml index 6193d34ab44..4327aecbb3a 100644 --- a/scala2.13/delta-lake/delta-23x/pom.xml +++ b/scala2.13/delta-lake/delta-23x/pom.xml @@ -21,9 +21,9 @@ com.nvidia - rapids-4-spark-parent_2.13 + rapids-4-spark-delta-common_2.13 23.12.0-SNAPSHOT - ../../pom.xml + ../common/pom.xml rapids-4-spark-delta-23x_2.13 @@ -37,54 +37,16 @@ **/* package - - - com.nvidia - rapids-4-spark-sql_${scala.binary.version} - ${project.version} - ${spark.version.classifier} - provided - io.delta delta-core_${scala.binary.version} 2.3.0 provided - - org.apache.spark - spark-sql_${scala.binary.version} - - - - org.codehaus.mojo - build-helper-maven-plugin - - - add-common-sources - generate-sources - - add-source - - - - - - ${project.basedir}/../../${rapids.module}/../common/src/main/scala - ${project.basedir}/../../${rapids.module}/../common/src/main/delta-io/scala - - - - - - net.alchim31.maven scala-maven-plugin diff --git a/scala2.13/delta-lake/delta-24x/pom.xml b/scala2.13/delta-lake/delta-24x/pom.xml index 6aa94f5a546..ed015698f7c 100644 --- a/scala2.13/delta-lake/delta-24x/pom.xml +++ b/scala2.13/delta-lake/delta-24x/pom.xml @@ -21,9 +21,9 @@ com.nvidia - rapids-4-spark-jdk-profiles_2.13 + rapids-4-spark-delta-common_2.13 23.12.0-SNAPSHOT - ../../jdk-profiles/pom.xml + ../common/pom.xml rapids-4-spark-delta-24x_2.13 @@ -39,52 +39,16 @@ - - com.nvidia - rapids-4-spark-sql_${scala.binary.version} - ${project.version} - ${spark.version.classifier} - provided - io.delta delta-core_${scala.binary.version} 2.4.0 provided - - org.apache.spark - spark-sql_${scala.binary.version} - - - org.codehaus.mojo - build-helper-maven-plugin - - - add-common-sources - generate-sources - - add-source - - - - - - ${project.basedir}/../../${rapids.module}/../common/src/main/scala - ${project.basedir}/../../${rapids.module}/../common/src/main/delta-io/scala - - - - - - net.alchim31.maven scala-maven-plugin diff --git a/scala2.13/delta-lake/delta-spark321db/pom.xml b/scala2.13/delta-lake/delta-spark321db/pom.xml index c0c0bbc0385..326b3e478b3 100644 --- a/scala2.13/delta-lake/delta-spark321db/pom.xml +++ b/scala2.13/delta-lake/delta-spark321db/pom.xml @@ -21,9 +21,9 @@ com.nvidia - rapids-4-spark-jdk-profiles_2.13 + rapids-4-spark-delta-common_2.13 23.12.0-SNAPSHOT - ../../jdk-profiles/pom.xml + ../common/pom.xml rapids-4-spark-delta-spark321db_2.13 @@ -39,13 +39,6 @@ - - com.nvidia - rapids-4-spark-sql_${scala.binary.version} - ${project.version} - ${spark.version.classifier} - provided - com.nvidia rapids-4-spark-db-bom @@ -57,31 +50,6 @@ - - org.codehaus.mojo - build-helper-maven-plugin - - - add-common-sources - generate-sources - - add-source - - - - - - ${project.basedir}/../../${rapids.module}/../common/src/main/scala - ${project.basedir}/../../${rapids.module}/../common/src/main/databricks/scala - - - - - - net.alchim31.maven scala-maven-plugin diff --git a/scala2.13/delta-lake/delta-spark330db/pom.xml b/scala2.13/delta-lake/delta-spark330db/pom.xml index 9ba4fd9f742..14537f6ed84 100644 --- a/scala2.13/delta-lake/delta-spark330db/pom.xml +++ b/scala2.13/delta-lake/delta-spark330db/pom.xml @@ -21,9 +21,9 @@ com.nvidia - rapids-4-spark-jdk-profiles_2.13 + rapids-4-spark-delta-common_2.13 23.12.0-SNAPSHOT - ../../jdk-profiles/pom.xml + ../common/pom.xml rapids-4-spark-delta-spark330db_2.13 @@ -39,13 +39,6 @@ - - com.nvidia - rapids-4-spark-sql_${scala.binary.version} - ${project.version} - ${spark.version.classifier} - provided - com.nvidia rapids-4-spark-db-bom @@ -57,31 +50,6 @@ - - org.codehaus.mojo - build-helper-maven-plugin - - - add-common-sources - generate-sources - - add-source - - - - - - ${project.basedir}/../../${rapids.module}/../common/src/main/scala - ${project.basedir}/../../${rapids.module}/../common/src/main/databricks/scala - - - - - - net.alchim31.maven scala-maven-plugin diff --git a/scala2.13/delta-lake/delta-spark332db/pom.xml b/scala2.13/delta-lake/delta-spark332db/pom.xml index 506e2d392c7..81adf2765b7 100644 --- a/scala2.13/delta-lake/delta-spark332db/pom.xml +++ b/scala2.13/delta-lake/delta-spark332db/pom.xml @@ -21,9 +21,9 @@ com.nvidia - rapids-4-spark-jdk-profiles_2.13 + rapids-4-spark-delta-common_2.13 23.12.0-SNAPSHOT - ../../jdk-profiles/pom.xml + ../common/pom.xml rapids-4-spark-delta-spark332db_2.13 @@ -39,13 +39,6 @@ - - com.nvidia - rapids-4-spark-sql_${scala.binary.version} - ${project.version} - ${spark.version.classifier} - provided - com.nvidia rapids-4-spark-db-bom @@ -57,31 +50,6 @@ - - org.codehaus.mojo - build-helper-maven-plugin - - - add-common-sources - generate-sources - - add-source - - - - - - ${project.basedir}/../../${rapids.module}/../common/src/main/scala - ${project.basedir}/../../${rapids.module}/../common/src/main/databricks/scala - - - - - - net.alchim31.maven scala-maven-plugin diff --git a/scala2.13/pom.xml b/scala2.13/pom.xml index 629692d6e65..f4ed69d5246 100644 --- a/scala2.13/pom.xml +++ b/scala2.13/pom.xml @@ -80,6 +80,7 @@ sql-plugin-api tests udf-compiler + delta-lake/common - - - - - - + + + - - - - - - - + + + + + + + + Regenerating build info properties because head revision changed: + previous=${saved.build-info.revision} + current=${git.head.revision} + + + + + + + + + + + + + + + + + From da86cdac4a7b4db5fc3ef1c8cce1951f25c95d94 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Sat, 11 Nov 2023 02:15:20 -0800 Subject: [PATCH 04/28] compile output as dependency Signed-off-by: Gera Shegalov --- pom.xml | 17 ++++++----------- scala2.13/pom.xml | 17 ++++++----------- 2 files changed, 12 insertions(+), 22 deletions(-) diff --git a/pom.xml b/pom.xml index d0ad9864493..94f7d9c285b 100644 --- a/pom.xml +++ b/pom.xml @@ -696,7 +696,7 @@ 3.3.2-databricks 3.5.0 3.12.4 - 4.3.0 + 4.8.0 3.1.1 3.3.0 2.0.2 @@ -1070,16 +1070,11 @@ org.apache.maven.plugins maven-compiler-plugin - - - default-compile - none - - - default-testCompile - none - - + 3.11.0 + + true + true + net.alchim31.maven diff --git a/scala2.13/pom.xml b/scala2.13/pom.xml index f4ed69d5246..4abd27df15c 100644 --- a/scala2.13/pom.xml +++ b/scala2.13/pom.xml @@ -696,7 +696,7 @@ 3.3.2-databricks 3.5.0 3.12.4 - 4.3.0 + 4.8.0 3.1.1 3.3.0 2.0.2 @@ -1070,16 +1070,11 @@ org.apache.maven.plugins maven-compiler-plugin - - - default-compile - none - - - default-testCompile - none - - + 3.11.0 + + true + true + net.alchim31.maven From 968421488b41cdd4039987dfac7e20ebf0a4bb86 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Sat, 11 Nov 2023 02:20:07 -0800 Subject: [PATCH 05/28] scala2.13 common Signed-off-by: Gera Shegalov --- scala2.13/delta-lake/common/pom.xml | 87 +++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 scala2.13/delta-lake/common/pom.xml diff --git a/scala2.13/delta-lake/common/pom.xml b/scala2.13/delta-lake/common/pom.xml new file mode 100644 index 00000000000..bfb332ffc9f --- /dev/null +++ b/scala2.13/delta-lake/common/pom.xml @@ -0,0 +1,87 @@ + + + + 4.0.0 + + + com.nvidia + rapids-4-spark-jdk-profiles_2.13 + 23.12.0-SNAPSHOT + ../../jdk-profiles/pom.xml + + + rapids-4-spark-delta-common_2.13 + Delta Lake Parent for the RAPIDS Accelerator for Apache Spark + 23.12.0-SNAPSHOT + pom + + + ../delta-lake/common + false + **/* + package + + + + + com.nvidia + rapids-4-spark-sql_${scala.binary.version} + ${project.version} + ${spark.version.classifier} + + + org.apache.spark + spark-sql_${scala.binary.version} + + + + + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-common-sources + generate-sources + + add-source + + + + + + ${project.basedir}/../../${rapids.module}/../common/src/main/scala + ${project.basedir}/../../${rapids.module}/../common/src/main/delta-io/scala + + + + + + + + org.apache.rat + apache-rat-plugin + + + + From f6dd0e58e404da91bfbfa6a9816352606b2525f2 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Sat, 11 Nov 2023 03:54:16 -0800 Subject: [PATCH 06/28] revert to 4.3.0 Signed-off-by: Gera Shegalov --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 94f7d9c285b..bdb8b77a91e 100644 --- a/pom.xml +++ b/pom.xml @@ -1134,8 +1134,8 @@ -Xfatal-warnings -Wconf:cat=lint-adapted-args:e - -Xsource:2.13 -Wconf:cat=lint-adapted-args:e - -Xsource:2.13 + -Xsource:2.13 -Ywarn-unused:locals,patvars,privates -Wconf:cat=deprecation:wv,any:e -Wconf:cat=scaladoc:wv From fd8bbd98440254063b2c1a5b6c9a1aff911a668a Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Sat, 11 Nov 2023 17:17:46 -0800 Subject: [PATCH 08/28] downgrade scala-maven-plugin Signed-off-by: Gera Shegalov --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index bdb8b77a91e..1abb775d1a1 100644 --- a/pom.xml +++ b/pom.xml @@ -696,7 +696,7 @@ 3.3.2-databricks 3.5.0 3.12.4 - 4.8.0 + 4.3.0 3.1.1 3.3.0 2.0.2 From f79d88b5d08c70f4ad082cf16098eebc0a9f3f20 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Sun, 12 Nov 2023 13:52:41 -0800 Subject: [PATCH 09/28] incremental compile Signed-off-by: Gera Shegalov --- integration_tests/src/assembly/bin.xml | 2 +- pom.xml | 26 +++++++++++++++--------- sql-plugin/pom.xml | 28 ++++++++++++++++++-------- 3 files changed, 37 insertions(+), 19 deletions(-) diff --git a/integration_tests/src/assembly/bin.xml b/integration_tests/src/assembly/bin.xml index c992b073eae..6209d0b152a 100644 --- a/integration_tests/src/assembly/bin.xml +++ b/integration_tests/src/assembly/bin.xml @@ -47,7 +47,7 @@ integration_tests - ${project.build.directory}/extra-resources/rapids4spark-version-info.properties + ${project.build.outputDirectory}/rapids4spark-version-info.properties integration_tests diff --git a/pom.xml b/pom.xml index 1abb775d1a1..2f37dab0d51 100644 --- a/pom.xml +++ b/pom.xml @@ -822,7 +822,7 @@ false install ${spark.rapids.source.basedir}/.bloop - ${project.build.directory}/extra-resources/rapids4spark-version-info.properties + ${project.build.outputDirectory}/rapids4spark-version-info.properties @@ -981,18 +981,24 @@ + +Comparing git revisions: + previous=${saved.build-info.revision} + current=${git.head.revision} + - - - + - Regenerating build info properties because head revision changed: - previous=${saved.build-info.revision} - current=${git.head.revision} + +Git revisions unchanged: skipping version info file generation. +Delete ${build.info.path} or mvn clean if regeneration desired. +This will force full Scala code rebuild in downstream modules. - - + + + Generating new version info file + - + diff --git a/sql-plugin/pom.xml b/sql-plugin/pom.xml index 9d752b57f8d..39feb85a8d5 100644 --- a/sql-plugin/pom.xml +++ b/sql-plugin/pom.xml @@ -154,7 +154,7 @@ @@ -171,13 +171,25 @@ run - - - - - + + + + + + + + Skipping shim service file generation, already exists + + + + + + + From 868e36b18e33c574e37df06f5c37e11a346b5c2a Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Sun, 12 Nov 2023 22:29:54 -0800 Subject: [PATCH 10/28] incremental end-to-end Signed-off-by: Gera Shegalov --- aggregator-tmp/pom.xml | 93 ++++++++++++++++++++++++++++++++++++++++++ aggregator/pom.xml | 11 +++++ pom.xml | 1 + tests/pom.xml | 2 +- 4 files changed, 106 insertions(+), 1 deletion(-) create mode 100644 aggregator-tmp/pom.xml diff --git a/aggregator-tmp/pom.xml b/aggregator-tmp/pom.xml new file mode 100644 index 00000000000..834e8584817 --- /dev/null +++ b/aggregator-tmp/pom.xml @@ -0,0 +1,93 @@ + + + + 4.0.0 + + + com.nvidia + rapids-4-spark-jdk-profiles_2.12 + 23.12.0-SNAPSHOT + ../jdk-profiles/pom.xml + + rapids-4-spark-aggregator-tmp_2.12 + 23.12.0-SNAPSHOT + + + false + **/* + aggregator-tmp + package + none + + + + com.nvidia + rapids-4-spark-aggregator_${scala.binary.version} + ${project.version} + ${spark.version.classifier} + provided + + + + + + org.apache.maven.plugins + maven-antrun-plugin + + + get-real-aggregator-if-changed + run + compile + + + + + + + + + + + Checksums ${realAggJarMD5} ${testAggJarMD5} + + + + Aggregator jar unchanged + + + Aggregator jar changed + + + + + + Tests aggregator jar missing .. recreating + + + + + + + + + + + diff --git a/aggregator/pom.xml b/aggregator/pom.xml index f2fc06a370f..d247c15204e 100644 --- a/aggregator/pom.xml +++ b/aggregator/pom.xml @@ -167,6 +167,17 @@ + + org.apache.maven.plugins + maven-antrun-plugin + + + promote-if-checksum-changed + post-package + run + + + diff --git a/pom.xml b/pom.xml index 2f37dab0d51..cf6ea588b22 100644 --- a/pom.xml +++ b/pom.xml @@ -72,6 +72,7 @@ aggregator + aggregator-tmp datagen dist integration_tests diff --git a/tests/pom.xml b/tests/pom.xml index 1dc1807c887..3c5e5f5aff8 100644 --- a/tests/pom.xml +++ b/tests/pom.xml @@ -67,7 +67,7 @@ com.nvidia - rapids-4-spark-aggregator_${scala.binary.version} + rapids-4-spark-aggregator-tmp_${scala.binary.version} ${project.version} ${spark.version.classifier} test From ca299c8c4f9ef5fc8a4aa214018624a7d3928c91 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Mon, 13 Nov 2023 11:10:43 -0800 Subject: [PATCH 11/28] may achieve with a single aggregate module Signed-off-by: Gera Shegalov --- aggregator/build/expected-jar-metadata.diff | Bin 0 -> 516 bytes aggregator/pom.xml | 60 +++++++++++++++----- 2 files changed, 47 insertions(+), 13 deletions(-) create mode 100644 aggregator/build/expected-jar-metadata.diff diff --git a/aggregator/build/expected-jar-metadata.diff b/aggregator/build/expected-jar-metadata.diff new file mode 100644 index 0000000000000000000000000000000000000000..bcf454fc9b9731f8b60d5218e62502399f4b8050 GIT binary patch literal 516 zcmXrgF-|tpG3K&SU|`^2VCeA64QD_CoItj(Ylx$+r=OdCfHyM>ib8h7#vq^q5EemJ z=E;?7qUY=8o0ylGmRek*8com.nvidia.shaded.spark false none + **/* + initialize + package @@ -73,7 +76,6 @@ maven-shade-plugin true - ${spark.version.classifier} org.slf4j:* @@ -108,13 +110,56 @@ main-${spark.version.classifier} - package + compile shade + + org.apache.maven.plugins + maven-antrun-plugin + + + create-aggregator-for-downstream-if-md5-changed + run + process-classes + + + + + + Checking if need to recreate: ${aggJarForDownstream} + + + + + + + + + + + + + + Aggregator jar unchanged + + + Recreating final jar + + + + + + + + + + org.jacoco jacoco-maven-plugin @@ -167,17 +212,6 @@ - - org.apache.maven.plugins - maven-antrun-plugin - - - promote-if-checksum-changed - post-package - run - - - From 2a35697a3bc562471833cda61680eaf8ee4cec24 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Mon, 13 Nov 2023 16:00:59 -0800 Subject: [PATCH 12/28] remove aggrgator-tmp --- aggregator-tmp/pom.xml | 93 ------------------- aggregator/pom.xml | 28 +++--- pom.xml | 1 - .../com/nvidia/spark/rapids/ShimLoader.scala | 1 + tests/pom.xml | 2 +- 5 files changed, 19 insertions(+), 106 deletions(-) delete mode 100644 aggregator-tmp/pom.xml diff --git a/aggregator-tmp/pom.xml b/aggregator-tmp/pom.xml deleted file mode 100644 index 834e8584817..00000000000 --- a/aggregator-tmp/pom.xml +++ /dev/null @@ -1,93 +0,0 @@ - - - - 4.0.0 - - - com.nvidia - rapids-4-spark-jdk-profiles_2.12 - 23.12.0-SNAPSHOT - ../jdk-profiles/pom.xml - - rapids-4-spark-aggregator-tmp_2.12 - 23.12.0-SNAPSHOT - - - false - **/* - aggregator-tmp - package - none - - - - com.nvidia - rapids-4-spark-aggregator_${scala.binary.version} - ${project.version} - ${spark.version.classifier} - provided - - - - - - org.apache.maven.plugins - maven-antrun-plugin - - - get-real-aggregator-if-changed - run - compile - - - - - - - - - - - Checksums ${realAggJarMD5} ${testAggJarMD5} - - - - Aggregator jar unchanged - - - Aggregator jar changed - - - - - - Tests aggregator jar missing .. recreating - - - - - - - - - - - diff --git a/aggregator/pom.xml b/aggregator/pom.xml index 990405aa96b..0cb43250ee2 100644 --- a/aggregator/pom.xml +++ b/aggregator/pom.xml @@ -121,6 +121,10 @@ org.apache.maven.plugins maven-antrun-plugin + + generate-build-info + generate-test-sources + create-aggregator-for-downstream-if-md5-changed run @@ -132,27 +136,29 @@ value="${project.build.outputDirectory}/../${project.build.finalName}-shaded.jar"/> + Checking if need to recreate: ${aggJarForDownstream} - - - - - - - - + + + + + + - + Aggregator jar unchanged Recreating final jar - + - + diff --git a/pom.xml b/pom.xml index cf6ea588b22..2f37dab0d51 100644 --- a/pom.xml +++ b/pom.xml @@ -72,7 +72,6 @@ aggregator - aggregator-tmp datagen dist integration_tests diff --git a/sql-plugin-api/src/main/scala/com/nvidia/spark/rapids/ShimLoader.scala b/sql-plugin-api/src/main/scala/com/nvidia/spark/rapids/ShimLoader.scala index 3723575810b..0e8116fa9fd 100644 --- a/sql-plugin-api/src/main/scala/com/nvidia/spark/rapids/ShimLoader.scala +++ b/sql-plugin-api/src/main/scala/com/nvidia/spark/rapids/ShimLoader.scala @@ -203,6 +203,7 @@ object ShimLoader extends Logging { private def detectShimProvider(): String = { val sparkVersion = getSparkVersion logInfo(s"Loading shim for Spark version: $sparkVersion") + logInfo(s"Loading shim for Spark version: $sparkVersion") logInfo("Complete Spark build info: " + sparkBuildInfo.mkString(", ")) logInfo("Scala version: " + util.Properties.versionString) diff --git a/tests/pom.xml b/tests/pom.xml index 3c5e5f5aff8..1dc1807c887 100644 --- a/tests/pom.xml +++ b/tests/pom.xml @@ -67,7 +67,7 @@ com.nvidia - rapids-4-spark-aggregator-tmp_${scala.binary.version} + rapids-4-spark-aggregator_${scala.binary.version} ${project.version} ${spark.version.classifier} test From 8ea3f768860c517ca683095ed9c94eb77683a1ac Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Mon, 13 Nov 2023 17:14:28 -0800 Subject: [PATCH 13/28] delete diff --- aggregator/build/expected-jar-metadata.diff | Bin 516 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 aggregator/build/expected-jar-metadata.diff diff --git a/aggregator/build/expected-jar-metadata.diff b/aggregator/build/expected-jar-metadata.diff deleted file mode 100644 index bcf454fc9b9731f8b60d5218e62502399f4b8050..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 516 zcmXrgF-|tpG3K&SU|`^2VCeA64QD_CoItj(Ylx$+r=OdCfHyM>ib8h7#vq^q5EemJ z=E;?7qUY=8o0ylGmRek*8 Date: Mon, 13 Nov 2023 17:53:33 -0800 Subject: [PATCH 14/28] Revert delta-lake changes --- delta-lake/common/pom.xml | 87 ----------------------------- delta-lake/delta-20x/pom.xml | 40 ++++++++++++- delta-lake/delta-21x/pom.xml | 40 ++++++++++++- delta-lake/delta-22x/pom.xml | 40 ++++++++++++- delta-lake/delta-23x/pom.xml | 42 +++++++++++++- delta-lake/delta-24x/pom.xml | 40 ++++++++++++- delta-lake/delta-spark321db/pom.xml | 36 +++++++++++- delta-lake/delta-spark330db/pom.xml | 36 +++++++++++- delta-lake/delta-spark332db/pom.xml | 36 +++++++++++- pom.xml | 43 +++++++------- 10 files changed, 314 insertions(+), 126 deletions(-) delete mode 100644 delta-lake/common/pom.xml diff --git a/delta-lake/common/pom.xml b/delta-lake/common/pom.xml deleted file mode 100644 index ced0a356789..00000000000 --- a/delta-lake/common/pom.xml +++ /dev/null @@ -1,87 +0,0 @@ - - - - 4.0.0 - - - com.nvidia - rapids-4-spark-jdk-profiles_2.12 - 23.12.0-SNAPSHOT - ../../jdk-profiles/pom.xml - - - rapids-4-spark-delta-common_2.12 - Delta Lake Parent for the RAPIDS Accelerator for Apache Spark - 23.12.0-SNAPSHOT - pom - - - ../delta-lake/common - false - **/* - package - - - - - com.nvidia - rapids-4-spark-sql_${scala.binary.version} - ${project.version} - ${spark.version.classifier} - - - org.apache.spark - spark-sql_${scala.binary.version} - - - - - - - org.codehaus.mojo - build-helper-maven-plugin - - - add-common-sources - generate-sources - - add-source - - - - - ${project.basedir}/../common/src/main/scala - ${project.basedir}/../common/src/main/delta-io/scala - - - - - - - - - org.apache.rat - apache-rat-plugin - - - - diff --git a/delta-lake/delta-20x/pom.xml b/delta-lake/delta-20x/pom.xml index 0689c2825af..5cb0e2e2e4e 100644 --- a/delta-lake/delta-20x/pom.xml +++ b/delta-lake/delta-20x/pom.xml @@ -21,9 +21,9 @@ com.nvidia - rapids-4-spark-delta-common_2.12 + rapids-4-spark-jdk-profiles_2.12 23.12.0-SNAPSHOT - ../common/pom.xml + ../../jdk-profiles/pom.xml rapids-4-spark-delta-20x_2.12 @@ -39,16 +39,52 @@ + + com.nvidia + rapids-4-spark-sql_${scala.binary.version} + ${project.version} + ${spark.version.classifier} + provided + io.delta delta-core_${scala.binary.version} 2.0.1 provided + + org.apache.spark + spark-sql_${scala.binary.version} + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-common-sources + generate-sources + + add-source + + + + + ${project.basedir}/../common/src/main/scala + ${project.basedir}/../common/src/main/delta-io/scala + + + + + + + net.alchim31.maven scala-maven-plugin diff --git a/delta-lake/delta-21x/pom.xml b/delta-lake/delta-21x/pom.xml index 85aa0839e2f..5b4e1225722 100644 --- a/delta-lake/delta-21x/pom.xml +++ b/delta-lake/delta-21x/pom.xml @@ -21,9 +21,9 @@ com.nvidia - rapids-4-spark-delta-common_2.12 + rapids-4-spark-jdk-profiles_2.12 23.12.0-SNAPSHOT - ../common/pom.xml + ../../jdk-profiles/pom.xml rapids-4-spark-delta-21x_2.12 @@ -39,16 +39,52 @@ + + com.nvidia + rapids-4-spark-sql_${scala.binary.version} + ${project.version} + ${spark.version.classifier} + provided + io.delta delta-core_${scala.binary.version} 2.1.1 provided + + org.apache.spark + spark-sql_${scala.binary.version} + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-common-sources + generate-sources + + add-source + + + + + ${project.basedir}/../common/src/main/scala + ${project.basedir}/../common/src/main/delta-io/scala + + + + + + + net.alchim31.maven scala-maven-plugin diff --git a/delta-lake/delta-22x/pom.xml b/delta-lake/delta-22x/pom.xml index d63ba2ef3c7..0b6d2175f2f 100644 --- a/delta-lake/delta-22x/pom.xml +++ b/delta-lake/delta-22x/pom.xml @@ -21,9 +21,9 @@ com.nvidia - rapids-4-spark-delta-common_2.12 + rapids-4-spark-jdk-profiles_2.12 23.12.0-SNAPSHOT - ../common/pom.xml + ../../jdk-profiles/pom.xml rapids-4-spark-delta-22x_2.12 @@ -39,16 +39,52 @@ + + com.nvidia + rapids-4-spark-sql_${scala.binary.version} + ${project.version} + ${spark.version.classifier} + provided + io.delta delta-core_${scala.binary.version} 2.2.0 provided + + org.apache.spark + spark-sql_${scala.binary.version} + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-common-sources + generate-sources + + add-source + + + + + ${project.basedir}/../common/src/main/scala + ${project.basedir}/../common/src/main/delta-io/scala + + + + + + + net.alchim31.maven scala-maven-plugin diff --git a/delta-lake/delta-23x/pom.xml b/delta-lake/delta-23x/pom.xml index b674ada5183..9b8cb489cb6 100644 --- a/delta-lake/delta-23x/pom.xml +++ b/delta-lake/delta-23x/pom.xml @@ -21,9 +21,9 @@ com.nvidia - rapids-4-spark-delta-common_2.12 + rapids-4-spark-parent_2.12 23.12.0-SNAPSHOT - ../common/pom.xml + ../../pom.xml rapids-4-spark-delta-23x_2.12 @@ -37,16 +37,54 @@ **/* package + + + com.nvidia + rapids-4-spark-sql_${scala.binary.version} + ${project.version} + ${spark.version.classifier} + provided + io.delta delta-core_${scala.binary.version} 2.3.0 provided + + org.apache.spark + spark-sql_${scala.binary.version} + + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-common-sources + generate-sources + + add-source + + + + + ${project.basedir}/../common/src/main/scala + ${project.basedir}/../common/src/main/delta-io/scala + + + + + + + net.alchim31.maven scala-maven-plugin diff --git a/delta-lake/delta-24x/pom.xml b/delta-lake/delta-24x/pom.xml index a0bec07dc87..93f625397bf 100644 --- a/delta-lake/delta-24x/pom.xml +++ b/delta-lake/delta-24x/pom.xml @@ -21,9 +21,9 @@ com.nvidia - rapids-4-spark-delta-common_2.12 + rapids-4-spark-jdk-profiles_2.12 23.12.0-SNAPSHOT - ../common/pom.xml + ../../jdk-profiles/pom.xml rapids-4-spark-delta-24x_2.12 @@ -39,16 +39,52 @@ + + com.nvidia + rapids-4-spark-sql_${scala.binary.version} + ${project.version} + ${spark.version.classifier} + provided + io.delta delta-core_${scala.binary.version} 2.4.0 provided + + org.apache.spark + spark-sql_${scala.binary.version} + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-common-sources + generate-sources + + add-source + + + + + ${project.basedir}/../common/src/main/scala + ${project.basedir}/../common/src/main/delta-io/scala + + + + + + + net.alchim31.maven scala-maven-plugin diff --git a/delta-lake/delta-spark321db/pom.xml b/delta-lake/delta-spark321db/pom.xml index 863101e22ba..95f9146f51a 100644 --- a/delta-lake/delta-spark321db/pom.xml +++ b/delta-lake/delta-spark321db/pom.xml @@ -21,9 +21,9 @@ com.nvidia - rapids-4-spark-delta-common_2.12 + rapids-4-spark-jdk-profiles_2.12 23.12.0-SNAPSHOT - ../common/pom.xml + ../../jdk-profiles/pom.xml rapids-4-spark-delta-spark321db_2.12 @@ -39,6 +39,13 @@ + + com.nvidia + rapids-4-spark-sql_${scala.binary.version} + ${project.version} + ${spark.version.classifier} + provided + com.nvidia rapids-4-spark-db-bom @@ -50,6 +57,31 @@ + + org.codehaus.mojo + build-helper-maven-plugin + + + add-common-sources + generate-sources + + add-source + + + + + ${project.basedir}/../common/src/main/scala + ${project.basedir}/../common/src/main/databricks/scala + + + + + + + net.alchim31.maven scala-maven-plugin diff --git a/delta-lake/delta-spark330db/pom.xml b/delta-lake/delta-spark330db/pom.xml index 8f4446d462b..c8ed34bd539 100644 --- a/delta-lake/delta-spark330db/pom.xml +++ b/delta-lake/delta-spark330db/pom.xml @@ -21,9 +21,9 @@ com.nvidia - rapids-4-spark-delta-common_2.12 + rapids-4-spark-jdk-profiles_2.12 23.12.0-SNAPSHOT - ../common/pom.xml + ../../jdk-profiles/pom.xml rapids-4-spark-delta-spark330db_2.12 @@ -39,6 +39,13 @@ + + com.nvidia + rapids-4-spark-sql_${scala.binary.version} + ${project.version} + ${spark.version.classifier} + provided + com.nvidia rapids-4-spark-db-bom @@ -50,6 +57,31 @@ + + org.codehaus.mojo + build-helper-maven-plugin + + + add-common-sources + generate-sources + + add-source + + + + + ${project.basedir}/../common/src/main/scala + ${project.basedir}/../common/src/main/databricks/scala + + + + + + + net.alchim31.maven scala-maven-plugin diff --git a/delta-lake/delta-spark332db/pom.xml b/delta-lake/delta-spark332db/pom.xml index 9cf1c1a682c..1d81d63aa94 100644 --- a/delta-lake/delta-spark332db/pom.xml +++ b/delta-lake/delta-spark332db/pom.xml @@ -21,9 +21,9 @@ com.nvidia - rapids-4-spark-delta-common_2.12 + rapids-4-spark-jdk-profiles_2.12 23.12.0-SNAPSHOT - ../common/pom.xml + ../../jdk-profiles/pom.xml rapids-4-spark-delta-spark332db_2.12 @@ -39,6 +39,13 @@ + + com.nvidia + rapids-4-spark-sql_${scala.binary.version} + ${project.version} + ${spark.version.classifier} + provided + com.nvidia rapids-4-spark-db-bom @@ -50,6 +57,31 @@ + + org.codehaus.mojo + build-helper-maven-plugin + + + add-common-sources + generate-sources + + add-source + + + + + ${project.basedir}/../common/src/main/scala + ${project.basedir}/../common/src/main/databricks/scala + + + + + + + net.alchim31.maven scala-maven-plugin diff --git a/pom.xml b/pom.xml index 2f37dab0d51..51a091d04e2 100644 --- a/pom.xml +++ b/pom.xml @@ -80,7 +80,6 @@ sql-plugin-api tests udf-compiler - delta-lake/common + + + + + + + + + + + Aggregator jar unchanged + + + Recreating final jar + + + + + + + + + + org.jacoco jacoco-maven-plugin diff --git a/scala2.13/delta-lake/delta-20x/pom.xml b/scala2.13/delta-lake/delta-20x/pom.xml index f3c3d46ff58..688d0154734 100644 --- a/scala2.13/delta-lake/delta-20x/pom.xml +++ b/scala2.13/delta-lake/delta-20x/pom.xml @@ -21,9 +21,9 @@ com.nvidia - rapids-4-spark-delta-common_2.13 + rapids-4-spark-jdk-profiles_2.13 23.12.0-SNAPSHOT - ../common/pom.xml + ../../jdk-profiles/pom.xml rapids-4-spark-delta-20x_2.13 @@ -39,16 +39,52 @@ + + com.nvidia + rapids-4-spark-sql_${scala.binary.version} + ${project.version} + ${spark.version.classifier} + provided + io.delta delta-core_${scala.binary.version} 2.0.1 provided + + org.apache.spark + spark-sql_${scala.binary.version} + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-common-sources + generate-sources + + add-source + + + + + + ${project.basedir}/../../${rapids.module}/../common/src/main/scala + ${project.basedir}/../../${rapids.module}/../common/src/main/delta-io/scala + + + + + + net.alchim31.maven scala-maven-plugin diff --git a/scala2.13/delta-lake/delta-21x/pom.xml b/scala2.13/delta-lake/delta-21x/pom.xml index 475c028a849..8a5b5d0b8f4 100644 --- a/scala2.13/delta-lake/delta-21x/pom.xml +++ b/scala2.13/delta-lake/delta-21x/pom.xml @@ -21,9 +21,9 @@ com.nvidia - rapids-4-spark-delta-common_2.13 + rapids-4-spark-jdk-profiles_2.13 23.12.0-SNAPSHOT - ../common/pom.xml + ../../jdk-profiles/pom.xml rapids-4-spark-delta-21x_2.13 @@ -39,16 +39,52 @@ + + com.nvidia + rapids-4-spark-sql_${scala.binary.version} + ${project.version} + ${spark.version.classifier} + provided + io.delta delta-core_${scala.binary.version} 2.1.1 provided + + org.apache.spark + spark-sql_${scala.binary.version} + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-common-sources + generate-sources + + add-source + + + + + + ${project.basedir}/../../${rapids.module}/../common/src/main/scala + ${project.basedir}/../../${rapids.module}/../common/src/main/delta-io/scala + + + + + + net.alchim31.maven scala-maven-plugin diff --git a/scala2.13/delta-lake/delta-22x/pom.xml b/scala2.13/delta-lake/delta-22x/pom.xml index 8c16e936b7f..58d417bb1ed 100644 --- a/scala2.13/delta-lake/delta-22x/pom.xml +++ b/scala2.13/delta-lake/delta-22x/pom.xml @@ -21,9 +21,9 @@ com.nvidia - rapids-4-spark-delta-common_2.13 + rapids-4-spark-jdk-profiles_2.13 23.12.0-SNAPSHOT - ../common/pom.xml + ../../jdk-profiles/pom.xml rapids-4-spark-delta-22x_2.13 @@ -39,16 +39,52 @@ + + com.nvidia + rapids-4-spark-sql_${scala.binary.version} + ${project.version} + ${spark.version.classifier} + provided + io.delta delta-core_${scala.binary.version} 2.2.0 provided + + org.apache.spark + spark-sql_${scala.binary.version} + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-common-sources + generate-sources + + add-source + + + + + + ${project.basedir}/../../${rapids.module}/../common/src/main/scala + ${project.basedir}/../../${rapids.module}/../common/src/main/delta-io/scala + + + + + + net.alchim31.maven scala-maven-plugin diff --git a/scala2.13/delta-lake/delta-23x/pom.xml b/scala2.13/delta-lake/delta-23x/pom.xml index 4327aecbb3a..6193d34ab44 100644 --- a/scala2.13/delta-lake/delta-23x/pom.xml +++ b/scala2.13/delta-lake/delta-23x/pom.xml @@ -21,9 +21,9 @@ com.nvidia - rapids-4-spark-delta-common_2.13 + rapids-4-spark-parent_2.13 23.12.0-SNAPSHOT - ../common/pom.xml + ../../pom.xml rapids-4-spark-delta-23x_2.13 @@ -37,16 +37,54 @@ **/* package + + + com.nvidia + rapids-4-spark-sql_${scala.binary.version} + ${project.version} + ${spark.version.classifier} + provided + io.delta delta-core_${scala.binary.version} 2.3.0 provided + + org.apache.spark + spark-sql_${scala.binary.version} + + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-common-sources + generate-sources + + add-source + + + + + + ${project.basedir}/../../${rapids.module}/../common/src/main/scala + ${project.basedir}/../../${rapids.module}/../common/src/main/delta-io/scala + + + + + + net.alchim31.maven scala-maven-plugin diff --git a/scala2.13/delta-lake/delta-24x/pom.xml b/scala2.13/delta-lake/delta-24x/pom.xml index ed015698f7c..6aa94f5a546 100644 --- a/scala2.13/delta-lake/delta-24x/pom.xml +++ b/scala2.13/delta-lake/delta-24x/pom.xml @@ -21,9 +21,9 @@ com.nvidia - rapids-4-spark-delta-common_2.13 + rapids-4-spark-jdk-profiles_2.13 23.12.0-SNAPSHOT - ../common/pom.xml + ../../jdk-profiles/pom.xml rapids-4-spark-delta-24x_2.13 @@ -39,16 +39,52 @@ + + com.nvidia + rapids-4-spark-sql_${scala.binary.version} + ${project.version} + ${spark.version.classifier} + provided + io.delta delta-core_${scala.binary.version} 2.4.0 provided + + org.apache.spark + spark-sql_${scala.binary.version} + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-common-sources + generate-sources + + add-source + + + + + + ${project.basedir}/../../${rapids.module}/../common/src/main/scala + ${project.basedir}/../../${rapids.module}/../common/src/main/delta-io/scala + + + + + + net.alchim31.maven scala-maven-plugin diff --git a/scala2.13/delta-lake/delta-spark321db/pom.xml b/scala2.13/delta-lake/delta-spark321db/pom.xml index 326b3e478b3..c0c0bbc0385 100644 --- a/scala2.13/delta-lake/delta-spark321db/pom.xml +++ b/scala2.13/delta-lake/delta-spark321db/pom.xml @@ -21,9 +21,9 @@ com.nvidia - rapids-4-spark-delta-common_2.13 + rapids-4-spark-jdk-profiles_2.13 23.12.0-SNAPSHOT - ../common/pom.xml + ../../jdk-profiles/pom.xml rapids-4-spark-delta-spark321db_2.13 @@ -39,6 +39,13 @@ + + com.nvidia + rapids-4-spark-sql_${scala.binary.version} + ${project.version} + ${spark.version.classifier} + provided + com.nvidia rapids-4-spark-db-bom @@ -50,6 +57,31 @@ + + org.codehaus.mojo + build-helper-maven-plugin + + + add-common-sources + generate-sources + + add-source + + + + + + ${project.basedir}/../../${rapids.module}/../common/src/main/scala + ${project.basedir}/../../${rapids.module}/../common/src/main/databricks/scala + + + + + + net.alchim31.maven scala-maven-plugin diff --git a/scala2.13/delta-lake/delta-spark330db/pom.xml b/scala2.13/delta-lake/delta-spark330db/pom.xml index 14537f6ed84..9ba4fd9f742 100644 --- a/scala2.13/delta-lake/delta-spark330db/pom.xml +++ b/scala2.13/delta-lake/delta-spark330db/pom.xml @@ -21,9 +21,9 @@ com.nvidia - rapids-4-spark-delta-common_2.13 + rapids-4-spark-jdk-profiles_2.13 23.12.0-SNAPSHOT - ../common/pom.xml + ../../jdk-profiles/pom.xml rapids-4-spark-delta-spark330db_2.13 @@ -39,6 +39,13 @@ + + com.nvidia + rapids-4-spark-sql_${scala.binary.version} + ${project.version} + ${spark.version.classifier} + provided + com.nvidia rapids-4-spark-db-bom @@ -50,6 +57,31 @@ + + org.codehaus.mojo + build-helper-maven-plugin + + + add-common-sources + generate-sources + + add-source + + + + + + ${project.basedir}/../../${rapids.module}/../common/src/main/scala + ${project.basedir}/../../${rapids.module}/../common/src/main/databricks/scala + + + + + + net.alchim31.maven scala-maven-plugin diff --git a/scala2.13/delta-lake/delta-spark332db/pom.xml b/scala2.13/delta-lake/delta-spark332db/pom.xml index 81adf2765b7..506e2d392c7 100644 --- a/scala2.13/delta-lake/delta-spark332db/pom.xml +++ b/scala2.13/delta-lake/delta-spark332db/pom.xml @@ -21,9 +21,9 @@ com.nvidia - rapids-4-spark-delta-common_2.13 + rapids-4-spark-jdk-profiles_2.13 23.12.0-SNAPSHOT - ../common/pom.xml + ../../jdk-profiles/pom.xml rapids-4-spark-delta-spark332db_2.13 @@ -39,6 +39,13 @@ + + com.nvidia + rapids-4-spark-sql_${scala.binary.version} + ${project.version} + ${spark.version.classifier} + provided + com.nvidia rapids-4-spark-db-bom @@ -50,6 +57,31 @@ + + org.codehaus.mojo + build-helper-maven-plugin + + + add-common-sources + generate-sources + + add-source + + + + + + ${project.basedir}/../../${rapids.module}/../common/src/main/scala + ${project.basedir}/../../${rapids.module}/../common/src/main/databricks/scala + + + + + + net.alchim31.maven scala-maven-plugin diff --git a/scala2.13/pom.xml b/scala2.13/pom.xml index bab4a3b785e..ac0bac0a401 100644 --- a/scala2.13/pom.xml +++ b/scala2.13/pom.xml @@ -80,7 +80,6 @@ sql-plugin-api tests udf-compiler - delta-lake/common @@ -171,13 +171,25 @@ run - - - - - + + + + + + + + Skipping shim service file generation, already exists + + + + + + + From 56f6ecdceecec7801a3444aed3fe9ccb2d868907 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Mon, 13 Nov 2023 20:44:55 -0800 Subject: [PATCH 16/28] revert unneeded changes Signed-off-by: Gera Shegalov --- scala2.13/delta-lake/common/pom.xml | 87 ------------------- .../com/nvidia/spark/rapids/ShimLoader.scala | 1 - 2 files changed, 88 deletions(-) delete mode 100644 scala2.13/delta-lake/common/pom.xml diff --git a/scala2.13/delta-lake/common/pom.xml b/scala2.13/delta-lake/common/pom.xml deleted file mode 100644 index bfb332ffc9f..00000000000 --- a/scala2.13/delta-lake/common/pom.xml +++ /dev/null @@ -1,87 +0,0 @@ - - - - 4.0.0 - - - com.nvidia - rapids-4-spark-jdk-profiles_2.13 - 23.12.0-SNAPSHOT - ../../jdk-profiles/pom.xml - - - rapids-4-spark-delta-common_2.13 - Delta Lake Parent for the RAPIDS Accelerator for Apache Spark - 23.12.0-SNAPSHOT - pom - - - ../delta-lake/common - false - **/* - package - - - - - com.nvidia - rapids-4-spark-sql_${scala.binary.version} - ${project.version} - ${spark.version.classifier} - - - org.apache.spark - spark-sql_${scala.binary.version} - - - - - - - org.codehaus.mojo - build-helper-maven-plugin - - - add-common-sources - generate-sources - - add-source - - - - - - ${project.basedir}/../../${rapids.module}/../common/src/main/scala - ${project.basedir}/../../${rapids.module}/../common/src/main/delta-io/scala - - - - - - - - org.apache.rat - apache-rat-plugin - - - - diff --git a/sql-plugin-api/src/main/scala/com/nvidia/spark/rapids/ShimLoader.scala b/sql-plugin-api/src/main/scala/com/nvidia/spark/rapids/ShimLoader.scala index 0e8116fa9fd..3723575810b 100644 --- a/sql-plugin-api/src/main/scala/com/nvidia/spark/rapids/ShimLoader.scala +++ b/sql-plugin-api/src/main/scala/com/nvidia/spark/rapids/ShimLoader.scala @@ -203,7 +203,6 @@ object ShimLoader extends Logging { private def detectShimProvider(): String = { val sparkVersion = getSparkVersion logInfo(s"Loading shim for Spark version: $sparkVersion") - logInfo(s"Loading shim for Spark version: $sparkVersion") logInfo("Complete Spark build info: " + sparkBuildInfo.mkString(", ")) logInfo("Scala version: " + util.Properties.versionString) From 9cc11f28474ce8467b46629cd256fd8576721313 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Mon, 13 Nov 2023 20:48:34 -0800 Subject: [PATCH 17/28] change execution id Signed-off-by: Gera Shegalov --- aggregator/pom.xml | 2 +- scala2.13/aggregator/pom.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/aggregator/pom.xml b/aggregator/pom.xml index 0cb43250ee2..149a2a5ef24 100644 --- a/aggregator/pom.xml +++ b/aggregator/pom.xml @@ -126,7 +126,7 @@ generate-test-sources - create-aggregator-for-downstream-if-md5-changed + create-aggregator-for-downstream-if-content-changed run process-classes diff --git a/scala2.13/aggregator/pom.xml b/scala2.13/aggregator/pom.xml index 149ce251df0..a932bee7069 100644 --- a/scala2.13/aggregator/pom.xml +++ b/scala2.13/aggregator/pom.xml @@ -126,7 +126,7 @@ generate-test-sources - create-aggregator-for-downstream-if-md5-changed + create-aggregator-for-downstream-if-content-changed run process-classes From 7ea535a097da643143866126c04ae47555af4eb8 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Mon, 13 Nov 2023 21:17:45 -0800 Subject: [PATCH 18/28] Replace `skip` with `maven.scaladoc.skip` Follow up to #9615 Signed-off-by: Gera Shegalov --- .github/workflows/mvn-verify-check.yml | 2 +- CONTRIBUTING.md | 2 +- build/buildall | 4 ++-- jenkins/databricks/build.sh | 2 +- jenkins/spark-premerge-build.sh | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/mvn-verify-check.yml b/.github/workflows/mvn-verify-check.yml index 0e9d7899502..cecbe488961 100644 --- a/.github/workflows/mvn-verify-check.yml +++ b/.github/workflows/mvn-verify-check.yml @@ -27,7 +27,7 @@ env: COMMON_MVN_FLAGS: >- -Ddist.jar.compress=false -DskipTests - -Dskip + -Dmaven.scaladoc.skip jobs: get-shim-versions-from-dist: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index eea7fe81387..eb91c3bfe31 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -282,7 +282,7 @@ Before proceeding with importing spark-rapids into IDEA or switching to a differ profile, execute the install phase with the corresponding `buildver`, e.g. for Spark 3.4.0: ```bash - mvn clean install -Dbuildver=340 -Dskip -DskipTests + mvn clean install -Dbuildver=340 -Dmaven.scaladoc.skip -DskipTests ``` ##### Importing the project diff --git a/build/buildall b/build/buildall index a700acad539..356efa2d46d 100755 --- a/build/buildall +++ b/build/buildall @@ -262,7 +262,7 @@ function build_single_shim() { -DskipTests \ -Dbuildver="$BUILD_VER" \ -Drat.skip="$SKIP_CHECKS" \ - -Dskip \ + -Dmaven.scaladoc.skip \ -Dmaven.scalastyle.skip="$SKIP_CHECKS" \ -pl aggregator -am > "$LOG_FILE" 2>&1 || { [[ "$LOG_FILE" != "/dev/tty" ]] && echo "$LOG_FILE:" && tail -20 "$LOG_FILE" || true @@ -303,5 +303,5 @@ time ( echo "Resuming from $joinShimBuildFrom build only using $BASE_VER" $MVN $FINAL_OP -rf $joinShimBuildFrom $MODULE_OPT $MVN_PROFILE_OPT $INCLUDED_BUILDVERS_OPT \ -Dbuildver="$BASE_VER" \ - -DskipTests -Dskip + -DskipTests -Dmaven.scaladoc.skip ) diff --git a/jenkins/databricks/build.sh b/jenkins/databricks/build.sh index 2f8e7686cab..8a0b25a0c95 100755 --- a/jenkins/databricks/build.sh +++ b/jenkins/databricks/build.sh @@ -150,7 +150,7 @@ $MVN_CMD -B -Ddatabricks -Dbuildver=$BUILDVER clean package -DskipTests $MVN_OPT if [[ "$WITH_DEFAULT_UPSTREAM_SHIM" != "0" ]]; then echo "Building the default Spark shim and creating a two-shim dist jar" UPSTREAM_BUILDVER=$($MVN_CMD help:evaluate -q -pl dist -Dexpression=buildver -DforceStdout) - $MVN_CMD -B package -pl dist -am -DskipTests -Dskip $MVN_OPT \ + $MVN_CMD -B package -pl dist -am -DskipTests -Dmaven.scaladoc.skip $MVN_OPT \ -Dincluded_buildvers=$UPSTREAM_BUILDVER,$BUILDVER fi diff --git a/jenkins/spark-premerge-build.sh b/jenkins/spark-premerge-build.sh index 5f3b33a108d..a13b5137af0 100755 --- a/jenkins/spark-premerge-build.sh +++ b/jenkins/spark-premerge-build.sh @@ -29,7 +29,7 @@ fi CUDA_CLASSIFIER=${CUDA_CLASSIFIER:-'cuda11'} MVN_CMD="mvn -Dmaven.wagon.http.retryHandler.count=3" -MVN_BUILD_ARGS="-Drat.skip=true -Dskip -Dmaven.scalastyle.skip=true -Dcuda.version=$CUDA_CLASSIFIER" +MVN_BUILD_ARGS="-Drat.skip=true -Dmaven.scaladoc.skip -Dmaven.scalastyle.skip=true -Dcuda.version=$CUDA_CLASSIFIER" mvn_verify() { echo "Run mvn verify..." From c3d974052e38c60ddf57fbd1f4f77287c537f5fe Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Tue, 14 Nov 2023 03:01:04 -0800 Subject: [PATCH 19/28] robuster jar swapping logic Signed-off-by: Gera Shegalov --- aggregator/pom.xml | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/aggregator/pom.xml b/aggregator/pom.xml index 149a2a5ef24..8f27589e011 100644 --- a/aggregator/pom.xml +++ b/aggregator/pom.xml @@ -41,7 +41,8 @@ none **/* initialize - package + + initialize @@ -123,7 +124,7 @@ generate-build-info - generate-test-sources + none create-aggregator-for-downstream-if-content-changed @@ -133,32 +134,38 @@ + location="${project.build.outputDirectory}/../${project.build.finalName}-shaded.jar"/> - + location="${project.build.outputDirectory}/../${project.build.finalName}-${spark.version.classifier}.jar"/> + + Checking if need to recreate: ${aggJarForDownstream} - + + + + + + + resultproperty="diff.result"> + - + + Aggregator jar unchanged - Recreating final jar - - - + Aggregator jar changed, recreating final jar + + + From b4ec26ef8411a4ab5dde6d3be41c846bdf913540 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Tue, 14 Nov 2023 03:09:41 -0800 Subject: [PATCH 20/28] scala2.13 sync Signed-off-by: Gera Shegalov --- scala2.13/aggregator/pom.xml | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/scala2.13/aggregator/pom.xml b/scala2.13/aggregator/pom.xml index a932bee7069..3963cef9104 100644 --- a/scala2.13/aggregator/pom.xml +++ b/scala2.13/aggregator/pom.xml @@ -41,7 +41,8 @@ none **/* initialize - package + + initialize @@ -123,7 +124,7 @@ generate-build-info - generate-test-sources + none create-aggregator-for-downstream-if-content-changed @@ -133,32 +134,38 @@ + location="${project.build.outputDirectory}/../${project.build.finalName}-shaded.jar"/> - + location="${project.build.outputDirectory}/../${project.build.finalName}-${spark.version.classifier}.jar"/> + + Checking if need to recreate: ${aggJarForDownstream} - + + + + + + + resultproperty="diff.result"> + - + + Aggregator jar unchanged - Recreating final jar - - - + Aggregator jar changed, recreating final jar + + + From d92fc17362b579e5c534d90a5cba8c57b054af61 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Tue, 14 Nov 2023 16:07:31 -0800 Subject: [PATCH 21/28] Initialize dirs check shim service file befor overwriting Signed-off-by: Gera Shegalov --- pom.xml | 11 +++++++++++ scala2.13/pom.xml | 11 +++++++++++ scala2.13/sql-plugin/pom.xml | 16 ++++++++++------ sql-plugin/pom.xml | 16 ++++++++++------ 4 files changed, 42 insertions(+), 12 deletions(-) diff --git a/pom.xml b/pom.xml index 51a091d04e2..ee3a751ebdb 100644 --- a/pom.xml +++ b/pom.xml @@ -967,6 +967,17 @@ + + setup-dirs + initialize + run + + + + + + + generate-build-info generate-resources diff --git a/scala2.13/pom.xml b/scala2.13/pom.xml index ac0bac0a401..9533c508da5 100644 --- a/scala2.13/pom.xml +++ b/scala2.13/pom.xml @@ -967,6 +967,17 @@ + + setup-dirs + initialize + run + + + + + + + generate-build-info generate-resources diff --git a/scala2.13/sql-plugin/pom.xml b/scala2.13/sql-plugin/pom.xml index b8ddbef3313..1d07a39eaa1 100644 --- a/scala2.13/sql-plugin/pom.xml +++ b/scala2.13/sql-plugin/pom.xml @@ -176,17 +176,21 @@ + location="${servicesDir}/com.nvidia.spark.rapids.SparkShimServiceProvider"/> + + - + + + Skipping shim service file generation, already exists - - + Recreating shim service file + + diff --git a/sql-plugin/pom.xml b/sql-plugin/pom.xml index 39feb85a8d5..45fc181b90f 100644 --- a/sql-plugin/pom.xml +++ b/sql-plugin/pom.xml @@ -176,17 +176,21 @@ + location="${servicesDir}/com.nvidia.spark.rapids.SparkShimServiceProvider"/> + + - + + + Skipping shim service file generation, already exists - - + Recreating shim service file + + From 3f144ad490da5659e86e446a66219588296129fd Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Tue, 14 Nov 2023 18:18:36 -0800 Subject: [PATCH 22/28] Fix equals check, regen scala2.13 Signed-off-by: Gera Shegalov --- scala2.13/sql-plugin/pom.xml | 8 +++----- sql-plugin/pom.xml | 8 +++----- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/scala2.13/sql-plugin/pom.xml b/scala2.13/sql-plugin/pom.xml index 1d07a39eaa1..67f3f91c30f 100644 --- a/scala2.13/sql-plugin/pom.xml +++ b/scala2.13/sql-plugin/pom.xml @@ -176,14 +176,12 @@ + value="${servicesDir}/com.nvidia.spark.rapids.SparkShimServiceProvider"/> - + - - - + Skipping shim service file generation, already exists diff --git a/sql-plugin/pom.xml b/sql-plugin/pom.xml index 45fc181b90f..9773cc91ba1 100644 --- a/sql-plugin/pom.xml +++ b/sql-plugin/pom.xml @@ -176,14 +176,12 @@ + value="${servicesDir}/com.nvidia.spark.rapids.SparkShimServiceProvider"/> - + - - - + Skipping shim service file generation, already exists From 6af04b508aa3c19cf2bed0d586548d9448e1598d Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Tue, 14 Nov 2023 19:42:32 -0800 Subject: [PATCH 23/28] use copy instead of move Signed-off-by: Gera Shegalov --- aggregator/pom.xml | 2 +- scala2.13/aggregator/pom.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/aggregator/pom.xml b/aggregator/pom.xml index 8f27589e011..695a97e3d88 100644 --- a/aggregator/pom.xml +++ b/aggregator/pom.xml @@ -165,7 +165,7 @@ Aggregator jar changed, recreating final jar - + diff --git a/scala2.13/aggregator/pom.xml b/scala2.13/aggregator/pom.xml index 3963cef9104..cad9c8ea3fe 100644 --- a/scala2.13/aggregator/pom.xml +++ b/scala2.13/aggregator/pom.xml @@ -165,7 +165,7 @@ Aggregator jar changed, recreating final jar - + From db14701d07cb79710ef64384432af5c881e167ed Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Tue, 14 Nov 2023 22:06:28 -0800 Subject: [PATCH 24/28] undo compiler plugin change Signed-off-by: Gera Shegalov --- aggregator/pom.xml | 10 ++++++++++ pom.xml | 14 ++++++++++---- scala2.13/aggregator/pom.xml | 10 ++++++++++ scala2.13/pom.xml | 14 ++++++++++---- 4 files changed, 40 insertions(+), 8 deletions(-) diff --git a/aggregator/pom.xml b/aggregator/pom.xml index 695a97e3d88..8f8b6da47fc 100644 --- a/aggregator/pom.xml +++ b/aggregator/pom.xml @@ -122,6 +122,16 @@ org.apache.maven.plugins maven-antrun-plugin + + init-dirs + initialize + run + + + + + + generate-build-info none diff --git a/pom.xml b/pom.xml index ee3a751ebdb..179227b1b05 100644 --- a/pom.xml +++ b/pom.xml @@ -1085,10 +1085,16 @@ This will force full Scala code rebuild in downstream modules. org.apache.maven.plugins maven-compiler-plugin 3.11.0 - - true - true - + + + default-compile + none + + + default-testCompile + none + + net.alchim31.maven diff --git a/scala2.13/aggregator/pom.xml b/scala2.13/aggregator/pom.xml index cad9c8ea3fe..4868d10d74e 100644 --- a/scala2.13/aggregator/pom.xml +++ b/scala2.13/aggregator/pom.xml @@ -122,6 +122,16 @@ org.apache.maven.plugins maven-antrun-plugin + + init-dirs + initialize + run + + + + + + generate-build-info none diff --git a/scala2.13/pom.xml b/scala2.13/pom.xml index 9533c508da5..4922f481b2a 100644 --- a/scala2.13/pom.xml +++ b/scala2.13/pom.xml @@ -1085,10 +1085,16 @@ This will force full Scala code rebuild in downstream modules. org.apache.maven.plugins maven-compiler-plugin 3.11.0 - - true - true - + + + default-compile + none + + + default-testCompile + none + + net.alchim31.maven From f2051e319229ab82d626f7ca2eb5afa86a3a0f49 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Wed, 15 Nov 2023 14:23:39 -0800 Subject: [PATCH 25/28] rapids.jni.unpack.skip Signed-off-by: Gera Shegalov --- CONTRIBUTING.md | 10 ++++- dist/pom.xml | 2 + pom.xml | 34 +++++++-------- scala2.13/dist/pom.xml | 2 + scala2.13/pom.xml | 34 +++++++-------- tests/README.md | 94 ++++++++++++++++++++++++++++-------------- 6 files changed, 110 insertions(+), 66 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index eb91c3bfe31..1cc52e5472a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -218,6 +218,12 @@ for a single Spark version Shim alone. To this end in a pre-production build you can set the Boolean property `dist.jar.compress` to `false`, its default value is `true`. +Furthermore, after the first build execution on the clean repository the spark-rapids-jni +SNAPSHOT dependency typically does not change until the next nightly CI build, or the next install +to the local Maven repo if you are working on a change to the native code. So you can save +significant time spent on repeated unpacking these dependencies by adding `-Drapids.jni.unpack.skip` +to the `dist` build command. + The time saved is more significant if you are merely changing the `aggregator` module, or the `dist` module, or just incorporating changes from [spark-rapids-jni](https://github.com/NVIDIA/spark-rapids-jni/blob/branch-23.04/CONTRIBUTING.md#local-testing-of-cross-repo-contributions-cudf-spark-rapids-jni-and-spark-rapids) @@ -225,12 +231,12 @@ the `aggregator` module, or the `dist` module, or just incorporating changes fro For example, to quickly repackage `rapids-4-spark` after the initial `./build/buildall` you can iterate by invoking ```Bash -mvn package -pl dist -PnoSnapshots -Ddist.jar.compress=false +mvn package -pl dist -PnoSnapshots -Ddist.jar.compress=false -Drapids.jni.unpack.skip ``` or similarly ```Bash - ./build/buildall --rebuild-dist-only --option="-Ddist.jar.compress=false" + ./build/buildall --rebuild-dist-only --option="-Ddist.jar.compress=false -Drapids.jni.unpack.skip" ``` ## Code contributions diff --git a/dist/pom.xml b/dist/pom.xml index dd46404e33d..395e0debc71 100644 --- a/dist/pom.xml +++ b/dist/pom.xml @@ -45,6 +45,7 @@ ${project.build.directory}/${project.build.finalName}-${jni.classifier}.jar jar:file:${dist.jar.name}!/META-INF/maven/${project.groupId}/${project.artifactId}/pom.xml none + false @@ -447,6 +448,7 @@ self.log("... OK") unpack + ${rapids.jni.unpack.skip} diff --git a/pom.xml b/pom.xml index 179227b1b05..297492604de 100644 --- a/pom.xml +++ b/pom.xml @@ -1007,23 +1007,23 @@ This will force full Scala code rebuild in downstream modules. Generating new version info file - - - - - - - - - - - - - + + + + + + + + + + + + + diff --git a/scala2.13/dist/pom.xml b/scala2.13/dist/pom.xml index a065880fcfb..d16eb881c3b 100644 --- a/scala2.13/dist/pom.xml +++ b/scala2.13/dist/pom.xml @@ -45,6 +45,7 @@ ${project.build.directory}/${project.build.finalName}-${jni.classifier}.jar jar:file:${dist.jar.name}!/META-INF/maven/${project.groupId}/${project.artifactId}/pom.xml none + false @@ -447,6 +448,7 @@ self.log("... OK") unpack + ${rapids.jni.unpack.skip} diff --git a/scala2.13/pom.xml b/scala2.13/pom.xml index 4922f481b2a..fbc33b06cb5 100644 --- a/scala2.13/pom.xml +++ b/scala2.13/pom.xml @@ -1007,23 +1007,23 @@ This will force full Scala code rebuild in downstream modules. Generating new version info file - - - - - - - - - - - - - + + + + + + + + + + + + + diff --git a/tests/README.md b/tests/README.md index 483c1309ec1..7f0fe1df837 100644 --- a/tests/README.md +++ b/tests/README.md @@ -7,46 +7,80 @@ and the code is in the `com.nvidia.spark.rapids.tests.mortgage` package. ## Unit Tests -Unit tests exist in the [tests]() directory. This is unconventional and is done, so we can run the -tests on the final shaded version of the plugin. It also helps with how we collect code coverage. +Unit tests implemented using the ScalaTest framework reside in the [tests]() directory. This is +unconventional and is done, so we can run the tests on the close-to-final shaded single-shim version +of the plugin. It also helps with how we collect code coverage. -The `tests` module depends on the `aggregator` module which shades dependencies. When running the -tests via `mvn test`, make sure to run install command via `mvn install` for the aggregator jar to the -local maven repository. -The steps to run the unit tests: -```bash -cd -mvn clean install -cd tests -mvn test -``` +The `tests` module depends on the `aggregator` module which shades external dependencies and +aggregates them along with internal submodules into an artifact supporting a single Spark version. + +The minimum required Maven phase to run unit tests is `package`. Alternatively, you may run +`mvn install` and use `mvn test` for subsequent testing. However, to avoid dealing with stale jars +in the local Maven repo cache, we recommend to invoke `mvn package -pl tests -am ...` from the +`spark-rapids` root directory. Add `-f scala2.13` if you want to run against + +To run targeted Scala tests use + +`-DwildcardSuites=` + +Or easier, use a combination of + +`-Dsuffixes=` to restrict the test suites being run, +which corresponds to `-q` option in the +[ScalaTest runner](https://www.scalatest.org/user_guide/using_the_runner). + +and + +`-Dtests=`, to restrict tests run within test suites, +which corresponds to `-z` or `-t` options in the +[ScalaTest runner](https://www.scalatest.org/user_guide/using_the_runner). -To run targeted Scala tests append `-DwildcardSuites=` to the above command. - For more information about using scalatest with Maven please refer to the -[scalatest documentation](https://www.scalatest.org/user_guide/using_the_scalatest_maven_plugin). - +[scalatest documentation](https://www.scalatest.org/user_guide/using_the_scalatest_maven_plugin) +and the the +[source code](https://github.com/scalatest/scalatest-maven-plugin/blob/383f396162b7654930758b76a0696d3aa2ce5686/src/main/java/org/scalatest/tools/maven/AbstractScalaTestMojo.java#L34). + + #### Running Unit Tests Against Specific Apache Spark Versions You can run the unit tests against different versions of Spark using the different profiles. The -default version runs against Spark 3.1.1, to run against a specific version use one of the following -profiles: - - `-Prelease311` (Spark 3.1.1) - - `-Prelease321` (Spark 3.2.1) - - `-Prelease322` (Spark 3.2.2) - - `-Prelease330` (Spark 3.3.0) - - `-Prelease340` (Spark 3.4.0) +default version runs against Spark 3.1.1, to run against a specific version use a buildver property: + +- `-Dbuildver=311` (Spark 3.1.1) +- `-Dbuildver=350` (Spark 3.5.0) + +etc Please refer to the [tests project POM](pom.xml) to see the list of test profiles supported. Apache Spark specific configurations can be passed in by setting the `SPARK_CONF` environment variable. -Examples: -- To run tests against Apache Spark 3.2.1, - `mvn -Prelease321 test` -- To pass Apache Spark configs `--conf spark.dynamicAllocation.enabled=false --conf spark.task.cpus=1` do something like. - `SPARK_CONF="spark.dynamicAllocation.enabled=false,spark.task.cpus=1" mvn ...` -- To run test ParquetWriterSuite in package com.nvidia.spark.rapids, issue `mvn test -DwildcardSuites="com.nvidia.spark.rapids.ParquetWriterSuite"` +Examples: + +To run all tests against Apache Spark 3.2.1, + +```bash +mvn package -pl tests -am -Dbuildver=321 +``` + +To pass Apache Spark configs `--conf spark.dynamicAllocation.enabled=false --conf spark.task.cpus=1` +do something like. + +```bash +SPARK_CONF="spark.dynamicAllocation.enabled=false,spark.task.cpus=1" mvn ... +``` + +To run all tests in `ParquetWriterSuite` in package com.nvidia.spark.rapids, issue + +```bash +mvn package -pl tests -am -DwildcardSuites="com.nvidia.spark.rapids.ParquetWriterSuite" +``` + +To run all AnsiCastOpSuite and CastOpSuite tests dealing with decimals using +Apache Spark 3.3.0 on Scala 2.13 artifacts, issue: + +```bash +mvn package -pl tests -am -Dbuildver=330 -Dsuffixes='.*CastOpSuite' -Dtests=decimal +``` ## Integration Tests From e5aecbc1ae2d940ff29a56390fed4bd515cdb818 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Wed, 15 Nov 2023 14:41:37 -0800 Subject: [PATCH 26/28] Apply suggestions from code review Co-authored-by: Jason Lowe --- tests/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/README.md b/tests/README.md index 7f0fe1df837..6d343960940 100644 --- a/tests/README.md +++ b/tests/README.md @@ -8,7 +8,7 @@ and the code is in the `com.nvidia.spark.rapids.tests.mortgage` package. ## Unit Tests Unit tests implemented using the ScalaTest framework reside in the [tests]() directory. This is -unconventional and is done, so we can run the tests on the close-to-final shaded single-shim version +unconventional and is done so we can run the tests on the close-to-final shaded single-shim version of the plugin. It also helps with how we collect code coverage. The `tests` module depends on the `aggregator` module which shades external dependencies and From 5efd1c299ef85e1eaedefd4d09bf313995660653 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Wed, 15 Nov 2023 14:45:26 -0800 Subject: [PATCH 27/28] reviews Signed-off-by: Gera Shegalov --- tests/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/README.md b/tests/README.md index 6d343960940..b53291b6839 100644 --- a/tests/README.md +++ b/tests/README.md @@ -17,7 +17,8 @@ aggregates them along with internal submodules into an artifact supporting a sin The minimum required Maven phase to run unit tests is `package`. Alternatively, you may run `mvn install` and use `mvn test` for subsequent testing. However, to avoid dealing with stale jars in the local Maven repo cache, we recommend to invoke `mvn package -pl tests -am ...` from the -`spark-rapids` root directory. Add `-f scala2.13` if you want to run against +`spark-rapids` root directory. Add `-f scala2.13` if you want to run unit tests against +Apache Spark dependencies based on Scala 2.13. To run targeted Scala tests use From 24ca3c7b674cb151f5e11190375f28e9c7128db1 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Thu, 16 Nov 2023 11:31:07 -0800 Subject: [PATCH 28/28] Fixes - binary-dedupe should not look outside paralle-world directory - when unpack is skipped we should restore jni from a staged dir Signed-off-by: Gera Shegalov --- dist/pom.xml | 17 +++++++++++++++-- dist/scripts/binary-dedupe.sh | 4 ++-- scala2.13/dist/pom.xml | 17 +++++++++++++++-- 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/dist/pom.xml b/dist/pom.xml index 395e0debc71..a858d2865b5 100644 --- a/dist/pom.xml +++ b/dist/pom.xml @@ -324,6 +324,19 @@ + + copy-jni-and-ucx-classes + + process-resources + run + + + + + + + + verify @@ -456,14 +469,14 @@ self.log("... OK") spark-rapids-jni ${jni.classifier} META-INF/** - ${project.build.directory}/parallel-world + ${project.build.directory}/jni-deps true org.openucx jucx META-INF/** - ${project.build.directory}/parallel-world + ${project.build.directory}/jni-deps true diff --git a/dist/scripts/binary-dedupe.sh b/dist/scripts/binary-dedupe.sh index b28b1cfa69d..183e86b1524 100755 --- a/dist/scripts/binary-dedupe.sh +++ b/dist/scripts/binary-dedupe.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright (c) 2021-2022, NVIDIA CORPORATION. +# Copyright (c) 2021-2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -157,7 +157,7 @@ mv "$SPARK3XX_COMMON_DIR" parallel-world/ # Determine the list of unshimmed class files UNSHIMMED_LIST_TXT=unshimmed-result.txt echo "$((++STEP))/ creating sorted list of unshimmed classes > $UNSHIMMED_LIST_TXT" -find . -name '*.class' -not -path './parallel-world/spark3*' | \ +find ./parallel-world -name '*.class' -not -path './parallel-world/spark3*' | \ cut -d/ -f 3- | sort > "$UNSHIMMED_LIST_TXT" function verify_same_sha_for_unshimmed() { diff --git a/scala2.13/dist/pom.xml b/scala2.13/dist/pom.xml index d16eb881c3b..7e87dfe5f7c 100644 --- a/scala2.13/dist/pom.xml +++ b/scala2.13/dist/pom.xml @@ -324,6 +324,19 @@ + + copy-jni-and-ucx-classes + + process-resources + run + + + + + + + + verify @@ -456,14 +469,14 @@ self.log("... OK") spark-rapids-jni ${jni.classifier} META-INF/** - ${project.build.directory}/parallel-world + ${project.build.directory}/jni-deps true org.openucx jucx META-INF/** - ${project.build.directory}/parallel-world + ${project.build.directory}/jni-deps true