diff --git a/.github/workflows/mvn-verify-check.yml b/.github/workflows/mvn-verify-check.yml
index 0e9d7899502..cecbe488961 100644
--- a/.github/workflows/mvn-verify-check.yml
+++ b/.github/workflows/mvn-verify-check.yml
@@ -27,7 +27,7 @@ env:
COMMON_MVN_FLAGS: >-
-Ddist.jar.compress=false
-DskipTests
- -Dskip
+ -Dmaven.scaladoc.skip
jobs:
get-shim-versions-from-dist:
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index eea7fe81387..1cc52e5472a 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -218,6 +218,12 @@ for a single Spark version Shim alone.
To this end in a pre-production build you can set the Boolean property
`dist.jar.compress` to `false`, its default value is `true`.
+Furthermore, after the first build execution on the clean repository the spark-rapids-jni
+SNAPSHOT dependency typically does not change until the next nightly CI build, or the next install
+to the local Maven repo if you are working on a change to the native code. So you can save
+significant time spent on repeated unpacking these dependencies by adding `-Drapids.jni.unpack.skip`
+to the `dist` build command.
+
The time saved is more significant if you are merely changing
the `aggregator` module, or the `dist` module, or just incorporating changes from
[spark-rapids-jni](https://github.com/NVIDIA/spark-rapids-jni/blob/branch-23.04/CONTRIBUTING.md#local-testing-of-cross-repo-contributions-cudf-spark-rapids-jni-and-spark-rapids)
@@ -225,12 +231,12 @@ the `aggregator` module, or the `dist` module, or just incorporating changes fro
For example, to quickly repackage `rapids-4-spark` after the
initial `./build/buildall` you can iterate by invoking
```Bash
-mvn package -pl dist -PnoSnapshots -Ddist.jar.compress=false
+mvn package -pl dist -PnoSnapshots -Ddist.jar.compress=false -Drapids.jni.unpack.skip
```
or similarly
```Bash
- ./build/buildall --rebuild-dist-only --option="-Ddist.jar.compress=false"
+ ./build/buildall --rebuild-dist-only --option="-Ddist.jar.compress=false -Drapids.jni.unpack.skip"
```
## Code contributions
@@ -282,7 +288,7 @@ Before proceeding with importing spark-rapids into IDEA or switching to a differ
profile, execute the install phase with the corresponding `buildver`, e.g. for Spark 3.4.0:
```bash
- mvn clean install -Dbuildver=340 -Dskip -DskipTests
+ mvn clean install -Dbuildver=340 -Dmaven.scaladoc.skip -DskipTests
```
##### Importing the project
diff --git a/aggregator/pom.xml b/aggregator/pom.xml
index f2fc06a370f..8f8b6da47fc 100644
--- a/aggregator/pom.xml
+++ b/aggregator/pom.xml
@@ -39,6 +39,10 @@
com.nvidia.shaded.sparkfalsenone
+ **/*
+ initialize
+
+ initialize
@@ -73,7 +77,6 @@
maven-shade-plugintrue
- ${spark.version.classifier}org.slf4j:*
@@ -108,13 +111,78 @@
main-${spark.version.classifier}
- package
+ compileshade
+
+ org.apache.maven.plugins
+ maven-antrun-plugin
+
+
+ init-dirs
+ initialize
+ run
+
+
+
+
+
+
+
+ generate-build-info
+ none
+
+
+ create-aggregator-for-downstream-if-content-changed
+ run
+ process-classes
+
+
+
+
+
+
+
+ Checking if need to recreate: ${aggJarForDownstream}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Aggregator jar unchanged
+
+
+ Aggregator jar changed, recreating final jar
+
+
+
+
+
+
+
+
+
+ org.jacocojacoco-maven-plugin
diff --git a/build/buildall b/build/buildall
index a700acad539..356efa2d46d 100755
--- a/build/buildall
+++ b/build/buildall
@@ -262,7 +262,7 @@ function build_single_shim() {
-DskipTests \
-Dbuildver="$BUILD_VER" \
-Drat.skip="$SKIP_CHECKS" \
- -Dskip \
+ -Dmaven.scaladoc.skip \
-Dmaven.scalastyle.skip="$SKIP_CHECKS" \
-pl aggregator -am > "$LOG_FILE" 2>&1 || {
[[ "$LOG_FILE" != "/dev/tty" ]] && echo "$LOG_FILE:" && tail -20 "$LOG_FILE" || true
@@ -303,5 +303,5 @@ time (
echo "Resuming from $joinShimBuildFrom build only using $BASE_VER"
$MVN $FINAL_OP -rf $joinShimBuildFrom $MODULE_OPT $MVN_PROFILE_OPT $INCLUDED_BUILDVERS_OPT \
-Dbuildver="$BASE_VER" \
- -DskipTests -Dskip
+ -DskipTests -Dmaven.scaladoc.skip
)
diff --git a/dist/pom.xml b/dist/pom.xml
index dd46404e33d..a858d2865b5 100644
--- a/dist/pom.xml
+++ b/dist/pom.xml
@@ -45,6 +45,7 @@
${project.build.directory}/${project.build.finalName}-${jni.classifier}.jarjar:file:${dist.jar.name}!/META-INF/maven/${project.groupId}/${project.artifactId}/pom.xmlnone
+ false
@@ -323,6 +324,19 @@
+
+ copy-jni-and-ucx-classes
+
+ process-resources
+ run
+
+
+
+
+
+
+
+ verify
@@ -447,6 +461,7 @@ self.log("... OK")
unpack
+ ${rapids.jni.unpack.skip}
@@ -454,14 +469,14 @@ self.log("... OK")
spark-rapids-jni${jni.classifier}META-INF/**
- ${project.build.directory}/parallel-world
+ ${project.build.directory}/jni-depstrueorg.openucxjucxMETA-INF/**
- ${project.build.directory}/parallel-world
+ ${project.build.directory}/jni-depstrue
diff --git a/dist/scripts/binary-dedupe.sh b/dist/scripts/binary-dedupe.sh
index b28b1cfa69d..183e86b1524 100755
--- a/dist/scripts/binary-dedupe.sh
+++ b/dist/scripts/binary-dedupe.sh
@@ -1,6 +1,6 @@
#!/bin/bash
-# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+# Copyright (c) 2021-2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -157,7 +157,7 @@ mv "$SPARK3XX_COMMON_DIR" parallel-world/
# Determine the list of unshimmed class files
UNSHIMMED_LIST_TXT=unshimmed-result.txt
echo "$((++STEP))/ creating sorted list of unshimmed classes > $UNSHIMMED_LIST_TXT"
-find . -name '*.class' -not -path './parallel-world/spark3*' | \
+find ./parallel-world -name '*.class' -not -path './parallel-world/spark3*' | \
cut -d/ -f 3- | sort > "$UNSHIMMED_LIST_TXT"
function verify_same_sha_for_unshimmed() {
diff --git a/integration_tests/src/assembly/bin.xml b/integration_tests/src/assembly/bin.xml
index c992b073eae..6209d0b152a 100644
--- a/integration_tests/src/assembly/bin.xml
+++ b/integration_tests/src/assembly/bin.xml
@@ -47,7 +47,7 @@
integration_tests
- ${project.build.directory}/extra-resources/rapids4spark-version-info.properties
+ ${project.build.outputDirectory}/rapids4spark-version-info.propertiesintegration_tests
diff --git a/jenkins/databricks/build.sh b/jenkins/databricks/build.sh
index 2f8e7686cab..8a0b25a0c95 100755
--- a/jenkins/databricks/build.sh
+++ b/jenkins/databricks/build.sh
@@ -150,7 +150,7 @@ $MVN_CMD -B -Ddatabricks -Dbuildver=$BUILDVER clean package -DskipTests $MVN_OPT
if [[ "$WITH_DEFAULT_UPSTREAM_SHIM" != "0" ]]; then
echo "Building the default Spark shim and creating a two-shim dist jar"
UPSTREAM_BUILDVER=$($MVN_CMD help:evaluate -q -pl dist -Dexpression=buildver -DforceStdout)
- $MVN_CMD -B package -pl dist -am -DskipTests -Dskip $MVN_OPT \
+ $MVN_CMD -B package -pl dist -am -DskipTests -Dmaven.scaladoc.skip $MVN_OPT \
-Dincluded_buildvers=$UPSTREAM_BUILDVER,$BUILDVER
fi
diff --git a/jenkins/spark-premerge-build.sh b/jenkins/spark-premerge-build.sh
index 5f3b33a108d..a13b5137af0 100755
--- a/jenkins/spark-premerge-build.sh
+++ b/jenkins/spark-premerge-build.sh
@@ -29,7 +29,7 @@ fi
CUDA_CLASSIFIER=${CUDA_CLASSIFIER:-'cuda11'}
MVN_CMD="mvn -Dmaven.wagon.http.retryHandler.count=3"
-MVN_BUILD_ARGS="-Drat.skip=true -Dskip -Dmaven.scalastyle.skip=true -Dcuda.version=$CUDA_CLASSIFIER"
+MVN_BUILD_ARGS="-Drat.skip=true -Dmaven.scaladoc.skip -Dmaven.scalastyle.skip=true -Dcuda.version=$CUDA_CLASSIFIER"
mvn_verify() {
echo "Run mvn verify..."
diff --git a/pom.xml b/pom.xml
index afb519ffc03..297492604de 100644
--- a/pom.xml
+++ b/pom.xml
@@ -819,6 +819,7 @@
falseinstall${spark.rapids.source.basedir}/.bloop
+ ${project.build.outputDirectory}/rapids4spark-version-info.properties
@@ -966,30 +967,64 @@
+
+ setup-dirs
+ initialize
+ run
+
+
+
+
+
+
+ generate-build-infogenerate-resources
-
-
-
-
-
-
+
+
+
-
-
-
-
-
-
-
+
+
+Comparing git revisions:
+ previous=${saved.build-info.revision}
+ current=${git.head.revision}
+
+
+
+
+
+
+Git revisions unchanged: skipping version info file generation.
+Delete ${build.info.path} or mvn clean if regeneration desired.
+This will force full Scala code rebuild in downstream modules.
+
+
+
+ Generating new version info file
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -1049,6 +1084,7 @@
org.apache.maven.pluginsmaven-compiler-plugin
+ 3.11.0default-compile
@@ -1118,8 +1154,8 @@
-Xfatal-warnings-Wconf:cat=lint-adapted-args:e
- -Xsource:2.13
+ initialize
@@ -73,7 +77,6 @@
maven-shade-plugintrue
- ${spark.version.classifier}org.slf4j:*
@@ -108,13 +111,78 @@
main-${spark.version.classifier}
- package
+ compileshade
+
+ org.apache.maven.plugins
+ maven-antrun-plugin
+
+
+ init-dirs
+ initialize
+ run
+
+
+
+
+
+
+
+ generate-build-info
+ none
+
+
+ create-aggregator-for-downstream-if-content-changed
+ run
+ process-classes
+
+
+
+
+
+
+
+ Checking if need to recreate: ${aggJarForDownstream}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Aggregator jar unchanged
+
+
+ Aggregator jar changed, recreating final jar
+
+
+
+
+
+
+
+
+
+ org.jacocojacoco-maven-plugin
diff --git a/scala2.13/dist/pom.xml b/scala2.13/dist/pom.xml
index a065880fcfb..7e87dfe5f7c 100644
--- a/scala2.13/dist/pom.xml
+++ b/scala2.13/dist/pom.xml
@@ -45,6 +45,7 @@
${project.build.directory}/${project.build.finalName}-${jni.classifier}.jarjar:file:${dist.jar.name}!/META-INF/maven/${project.groupId}/${project.artifactId}/pom.xmlnone
+ false
@@ -323,6 +324,19 @@
+
+ copy-jni-and-ucx-classes
+
+ process-resources
+ run
+
+
+
+
+
+
+
+ verify
@@ -447,6 +461,7 @@ self.log("... OK")
unpack
+ ${rapids.jni.unpack.skip}
@@ -454,14 +469,14 @@ self.log("... OK")
spark-rapids-jni${jni.classifier}META-INF/**
- ${project.build.directory}/parallel-world
+ ${project.build.directory}/jni-depstrueorg.openucxjucxMETA-INF/**
- ${project.build.directory}/parallel-world
+ ${project.build.directory}/jni-depstrue
diff --git a/scala2.13/pom.xml b/scala2.13/pom.xml
index 629692d6e65..fbc33b06cb5 100644
--- a/scala2.13/pom.xml
+++ b/scala2.13/pom.xml
@@ -819,6 +819,7 @@
falseinstall${spark.rapids.source.basedir}/.bloop
+ ${project.build.outputDirectory}/rapids4spark-version-info.properties
@@ -966,30 +967,64 @@
+
+ setup-dirs
+ initialize
+ run
+
+
+
+
+
+
+ generate-build-infogenerate-resources
-
-
-
-
-
-
+
+
+
-
-
-
-
-
-
-
+
+
+Comparing git revisions:
+ previous=${saved.build-info.revision}
+ current=${git.head.revision}
+
+
+
+
+
+
+Git revisions unchanged: skipping version info file generation.
+Delete ${build.info.path} or mvn clean if regeneration desired.
+This will force full Scala code rebuild in downstream modules.
+
+
+
+ Generating new version info file
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -1049,6 +1084,7 @@
org.apache.maven.pluginsmaven-compiler-plugin
+ 3.11.0default-compile
@@ -1118,8 +1154,8 @@
-Xfatal-warnings
-->
-Wconf:cat=lint-adapted-args:e
- -Xsource:2.13
+ -Xsource:2.13-Ywarn-unused:locals,patvars,privates-Wconf:cat=deprecation:wv,any:e-Wconf:cat=scaladoc:wv
diff --git a/scala2.13/sql-plugin/pom.xml b/scala2.13/sql-plugin/pom.xml
index 02090fb5e7e..67f3f91c30f 100644
--- a/scala2.13/sql-plugin/pom.xml
+++ b/scala2.13/sql-plugin/pom.xml
@@ -154,7 +154,7 @@
@@ -171,13 +171,27 @@
run
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+ Skipping shim service file generation, already exists
+
+
+ Recreating shim service file
+
+
+
+
+
diff --git a/sql-plugin/pom.xml b/sql-plugin/pom.xml
index 9d752b57f8d..9773cc91ba1 100644
--- a/sql-plugin/pom.xml
+++ b/sql-plugin/pom.xml
@@ -154,7 +154,7 @@
@@ -171,13 +171,27 @@
run
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+ Skipping shim service file generation, already exists
+
+
+ Recreating shim service file
+
+
+
+
+
diff --git a/tests/README.md b/tests/README.md
index 483c1309ec1..b53291b6839 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -7,46 +7,81 @@ and the code is in the `com.nvidia.spark.rapids.tests.mortgage` package.
## Unit Tests
-Unit tests exist in the [tests]() directory. This is unconventional and is done, so we can run the
-tests on the final shaded version of the plugin. It also helps with how we collect code coverage.
+Unit tests implemented using the ScalaTest framework reside in the [tests]() directory. This is
+unconventional and is done so we can run the tests on the close-to-final shaded single-shim version
+of the plugin. It also helps with how we collect code coverage.
-The `tests` module depends on the `aggregator` module which shades dependencies. When running the
-tests via `mvn test`, make sure to run install command via `mvn install` for the aggregator jar to the
-local maven repository.
-The steps to run the unit tests:
-```bash
-cd
-mvn clean install
-cd tests
-mvn test
-```
+The `tests` module depends on the `aggregator` module which shades external dependencies and
+aggregates them along with internal submodules into an artifact supporting a single Spark version.
+
+The minimum required Maven phase to run unit tests is `package`. Alternatively, you may run
+`mvn install` and use `mvn test` for subsequent testing. However, to avoid dealing with stale jars
+in the local Maven repo cache, we recommend to invoke `mvn package -pl tests -am ...` from the
+`spark-rapids` root directory. Add `-f scala2.13` if you want to run unit tests against
+Apache Spark dependencies based on Scala 2.13.
+
+To run targeted Scala tests use
+
+`-DwildcardSuites=`
+
+Or easier, use a combination of
+
+`-Dsuffixes=` to restrict the test suites being run,
+which corresponds to `-q` option in the
+[ScalaTest runner](https://www.scalatest.org/user_guide/using_the_runner).
+
+and
+
+`-Dtests=`, to restrict tests run within test suites,
+which corresponds to `-z` or `-t` options in the
+[ScalaTest runner](https://www.scalatest.org/user_guide/using_the_runner).
-To run targeted Scala tests append `-DwildcardSuites=` to the above command.
-
For more information about using scalatest with Maven please refer to the
-[scalatest documentation](https://www.scalatest.org/user_guide/using_the_scalatest_maven_plugin).
-
+[scalatest documentation](https://www.scalatest.org/user_guide/using_the_scalatest_maven_plugin)
+and the the
+[source code](https://github.com/scalatest/scalatest-maven-plugin/blob/383f396162b7654930758b76a0696d3aa2ce5686/src/main/java/org/scalatest/tools/maven/AbstractScalaTestMojo.java#L34).
+
+
#### Running Unit Tests Against Specific Apache Spark Versions
You can run the unit tests against different versions of Spark using the different profiles. The
-default version runs against Spark 3.1.1, to run against a specific version use one of the following
-profiles:
- - `-Prelease311` (Spark 3.1.1)
- - `-Prelease321` (Spark 3.2.1)
- - `-Prelease322` (Spark 3.2.2)
- - `-Prelease330` (Spark 3.3.0)
- - `-Prelease340` (Spark 3.4.0)
+default version runs against Spark 3.1.1, to run against a specific version use a buildver property:
+
+- `-Dbuildver=311` (Spark 3.1.1)
+- `-Dbuildver=350` (Spark 3.5.0)
+
+etc
Please refer to the [tests project POM](pom.xml) to see the list of test profiles supported.
Apache Spark specific configurations can be passed in by setting the `SPARK_CONF` environment
variable.
-Examples:
-- To run tests against Apache Spark 3.2.1,
- `mvn -Prelease321 test`
-- To pass Apache Spark configs `--conf spark.dynamicAllocation.enabled=false --conf spark.task.cpus=1` do something like.
- `SPARK_CONF="spark.dynamicAllocation.enabled=false,spark.task.cpus=1" mvn ...`
-- To run test ParquetWriterSuite in package com.nvidia.spark.rapids, issue `mvn test -DwildcardSuites="com.nvidia.spark.rapids.ParquetWriterSuite"`
+Examples:
+
+To run all tests against Apache Spark 3.2.1,
+
+```bash
+mvn package -pl tests -am -Dbuildver=321
+```
+
+To pass Apache Spark configs `--conf spark.dynamicAllocation.enabled=false --conf spark.task.cpus=1`
+do something like.
+
+```bash
+SPARK_CONF="spark.dynamicAllocation.enabled=false,spark.task.cpus=1" mvn ...
+```
+
+To run all tests in `ParquetWriterSuite` in package com.nvidia.spark.rapids, issue
+
+```bash
+mvn package -pl tests -am -DwildcardSuites="com.nvidia.spark.rapids.ParquetWriterSuite"
+```
+
+To run all AnsiCastOpSuite and CastOpSuite tests dealing with decimals using
+Apache Spark 3.3.0 on Scala 2.13 artifacts, issue:
+
+```bash
+mvn package -pl tests -am -Dbuildver=330 -Dsuffixes='.*CastOpSuite' -Dtests=decimal
+```
## Integration Tests