From 2f1fae20ae601da27befd19a7f7de70dc979df8b Mon Sep 17 00:00:00 2001 From: Haoyang Li Date: Tue, 7 Nov 2023 16:17:42 +0800 Subject: [PATCH 01/31] Detect multiple jars on the classpath when init plugin Signed-off-by: Haoyang Li --- .../scala/com/nvidia/spark/rapids/Plugin.scala | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala index 6520ff4c1b7..7e91edaa9f7 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala @@ -111,6 +111,16 @@ object RapidsPluginUtils extends Logging { } } + def detectMultiplePluginJars(): Unit = { + val cl = classOf[com.nvidia.spark.rapids.SparkShimServiceProvider].getClassLoader + val rapidsJarURLs = cl.getResources("rapids4spark-version-info.properties").asScala.toList + if (rapidsJarURLs.size > 1) { + val rapidsJars = rapidsJarURLs.map(_.toString.split("!").head).mkString(",") + throw new RuntimeException(s"Multiple rapids4spark jars found in the classpath: " + + s"$rapidsJars. Please make sure there is only one rapids4spark jar in the classpath.") + } + } + // This assumes Apache Spark logic, if CSPs are setting defaults differently, we may need // to handle. def estimateCoresOnExec(conf: SparkConf): Int = { @@ -306,6 +316,7 @@ class RapidsDriverPlugin extends DriverPlugin with Logging { override def init( sc: SparkContext, pluginContext: PluginContext): java.util.Map[String, String] = { + RapidsPluginUtils.detectMultiplePluginJars() val sparkConf = pluginContext.conf RapidsPluginUtils.fixupConfigsOnDriver(sparkConf) val conf = new RapidsConf(sparkConf) @@ -352,6 +363,9 @@ class RapidsExecutorPlugin extends ExecutorPlugin with Logging { pluginContext: PluginContext, extraConf: java.util.Map[String, String]): Unit = { try { + // Fail if there are multiple plugin jars in the classpath. + RapidsPluginUtils.detectMultiplePluginJars() + if (Cuda.getComputeCapabilityMajor < 6) { throw new RuntimeException(s"GPU compute capability ${Cuda.getComputeCapabilityMajor}" + " is unsupported, requires 6.0+") From 0c5df62a4891b6e6e49d3f1a3dc493a12bec76b5 Mon Sep 17 00:00:00 2001 From: Haoyang Li Date: Tue, 7 Nov 2023 16:39:16 +0800 Subject: [PATCH 02/31] clean up Signed-off-by: Haoyang Li --- .../src/main/scala/com/nvidia/spark/rapids/Plugin.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala index 7e91edaa9f7..69fce6cd710 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala @@ -112,8 +112,9 @@ object RapidsPluginUtils extends Logging { } def detectMultiplePluginJars(): Unit = { - val cl = classOf[com.nvidia.spark.rapids.SparkShimServiceProvider].getClassLoader - val rapidsJarURLs = cl.getResources("rapids4spark-version-info.properties").asScala.toList + val classloader = classOf[com.nvidia.spark.rapids.SparkShimServiceProvider].getClassLoader + val rapidsJarURLs = classloader.getResources("rapids4spark-version-info.properties") + .asScala.toList if (rapidsJarURLs.size > 1) { val rapidsJars = rapidsJarURLs.map(_.toString.split("!").head).mkString(",") throw new RuntimeException(s"Multiple rapids4spark jars found in the classpath: " + From 742b50847355378625ba3d0a6dc1dd09205e966f Mon Sep 17 00:00:00 2001 From: Haoyang Li Date: Wed, 8 Nov 2023 13:30:04 +0800 Subject: [PATCH 03/31] Apply suggestions from code review Co-authored-by: Gera Shegalov --- .../src/main/scala/com/nvidia/spark/rapids/Plugin.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala index 69fce6cd710..020b7c072d0 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala @@ -112,8 +112,8 @@ object RapidsPluginUtils extends Logging { } def detectMultiplePluginJars(): Unit = { - val classloader = classOf[com.nvidia.spark.rapids.SparkShimServiceProvider].getClassLoader - val rapidsJarURLs = classloader.getResources("rapids4spark-version-info.properties") + val classloader = ShimLoader.getShimClassLoader() + val rapidsJarURLs = classloader.getResources(PLUGIN_PROPS_FILENAME) .asScala.toList if (rapidsJarURLs.size > 1) { val rapidsJars = rapidsJarURLs.map(_.toString.split("!").head).mkString(",") From 47ef387d4b59df8e5e0c8c3b0cfc2cf4c51452f7 Mon Sep 17 00:00:00 2001 From: Haoyang Li Date: Wed, 8 Nov 2023 16:00:58 +0800 Subject: [PATCH 04/31] print version info and also check jni/cudf Signed-off-by: Haoyang Li --- .../com/nvidia/spark/rapids/Plugin.scala | 29 +++++++++++++++---- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala index 69fce6cd710..1fa7bad310b 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala @@ -113,13 +113,30 @@ object RapidsPluginUtils extends Logging { def detectMultiplePluginJars(): Unit = { val classloader = classOf[com.nvidia.spark.rapids.SparkShimServiceProvider].getClassLoader - val rapidsJarURLs = classloader.getResources("rapids4spark-version-info.properties") + val rapidsJarURLs = classloader.getResources(PLUGIN_PROPS_FILENAME) .asScala.toList - if (rapidsJarURLs.size > 1) { - val rapidsJars = rapidsJarURLs.map(_.toString.split("!").head).mkString(",") - throw new RuntimeException(s"Multiple rapids4spark jars found in the classpath: " + - s"$rapidsJars. Please make sure there is only one rapids4spark jar in the classpath.") - } + lazy val rapidsJars = rapidsJarURLs.map { + url => scala.io.Source.fromInputStream(url.openStream()).mkString("") + }.mkString(",") + require(rapidsJarURLs.size <= 1, + s"Multiple rapids4spark jars found in the classpath, please make sure there is only " + + s"one rapids4spark jar in the classpath. Version info: \n$rapidsJars") + val cudfJarURLs = classloader.getResources(CUDF_PROPS_FILENAME) + .asScala.toList + lazy val cudfJars = cudfJarURLs.map { + url => scala.io.Source.fromInputStream(url.openStream()).mkString("") + }.mkString(",") + require(cudfJarURLs.size <= 1, + s"Multiple cudf jars found in the classpath, please make sure there is only " + + s"one cudf jar in the classpath. Version info: \n$cudfJars") + val jniJarURLs = classloader.getResources(JNI_PROPS_FILENAME) + .asScala.toList + lazy val jniJars = jniJarURLs.map { + url => scala.io.Source.fromInputStream(url.openStream()).mkString("") + }.mkString(",") + require(jniJarURLs.size <= 1, + s"Multiple spark-rapids-jni jars found in the classpath, please make sure there is only " + + s"one spark-rapids-jni jar in the classpath. Version info: \n$jniJars") } // This assumes Apache Spark logic, if CSPs are setting defaults differently, we may need From e29e37d41d64f9bb424ab36830cd3597b2f23459 Mon Sep 17 00:00:00 2001 From: Haoyang Li Date: Wed, 8 Nov 2023 16:37:54 +0800 Subject: [PATCH 05/31] add config for allowing multiple jars Signed-off-by: Haoyang Li --- .../main/scala/com/nvidia/spark/rapids/Plugin.scala | 11 ++++++++--- .../scala/com/nvidia/spark/rapids/RapidsConf.scala | 10 ++++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala index e39484dd99d..16e6be7dd2c 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala @@ -334,10 +334,12 @@ class RapidsDriverPlugin extends DriverPlugin with Logging { override def init( sc: SparkContext, pluginContext: PluginContext): java.util.Map[String, String] = { - RapidsPluginUtils.detectMultiplePluginJars() val sparkConf = pluginContext.conf RapidsPluginUtils.fixupConfigsOnDriver(sparkConf) val conf = new RapidsConf(sparkConf) + if (!conf.allowMultipleJars) { + RapidsPluginUtils.detectMultiplePluginJars() + } RapidsPluginUtils.logPluginMode(conf) GpuCoreDumpHandler.driverInit(sc, conf) @@ -381,8 +383,6 @@ class RapidsExecutorPlugin extends ExecutorPlugin with Logging { pluginContext: PluginContext, extraConf: java.util.Map[String, String]): Unit = { try { - // Fail if there are multiple plugin jars in the classpath. - RapidsPluginUtils.detectMultiplePluginJars() if (Cuda.getComputeCapabilityMajor < 6) { throw new RuntimeException(s"GPU compute capability ${Cuda.getComputeCapabilityMajor}" + @@ -395,6 +395,11 @@ class RapidsExecutorPlugin extends ExecutorPlugin with Logging { val numCores = RapidsPluginUtils.estimateCoresOnExec(sparkConf) val conf = new RapidsConf(extraConf.asScala.toMap) + if (!conf.allowMultipleJars) { + // Fail if there are multiple plugin jars in the classpath. + RapidsPluginUtils.detectMultiplePluginJars() + } + // Compare if the cudf version mentioned in the classpath is equal to the version which // plugin expects. If there is a version mismatch, throw error. This check can be disabled // by setting this config spark.rapids.cudfVersionOverride=true diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala index 82fb5cdb4b9..119107d1c1c 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala @@ -1839,6 +1839,14 @@ object RapidsConf { .booleanConf .createWithDefault(false) + val ALLOW_MULTIPLE_JARS = conf("spark.rapids.sql.allowMultipleJars") + .internal() + .startupOnly() + .doc("Allow multiple rapids-4-spark, spark-rapids-jni, and cudf jars on the classpath. " + + "Spark will take the first one it finds, so the version may not be expected. ") + .booleanConf + .createWithDefault(false) + val ALLOW_DISABLE_ENTIRE_PLAN = conf("spark.rapids.allowDisableEntirePlan") .internal() .doc("The plugin has the ability to detect possibe incompatibility with some specific " + @@ -2634,6 +2642,8 @@ class RapidsConf(conf: Map[String, String]) extends Logging { lazy val cudfVersionOverride: Boolean = get(CUDF_VERSION_OVERRIDE) + lazy val allowMultipleJars: Boolean = get(ALLOW_MULTIPLE_JARS) + lazy val allowDisableEntirePlan: Boolean = get(ALLOW_DISABLE_ENTIRE_PLAN) lazy val useArrowCopyOptimization: Boolean = get(USE_ARROW_OPT) From 0441f53b5204fc9b8d42a27e15294399b93a41ef Mon Sep 17 00:00:00 2001 From: Haoyang Li Date: Wed, 8 Nov 2023 17:32:27 +0800 Subject: [PATCH 06/31] keep jar path in error messages Signed-off-by: Haoyang Li --- .../com/nvidia/spark/rapids/Plugin.scala | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala index 16e6be7dd2c..c15fa0a47e6 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala @@ -115,28 +115,31 @@ object RapidsPluginUtils extends Logging { val classloader = ShimLoader.getShimClassLoader() val rapidsJarURLs = classloader.getResources(PLUGIN_PROPS_FILENAME) .asScala.toList - lazy val rapidsJars = rapidsJarURLs.map { + lazy val rapidsJars = rapidsJarURLs.map(_.toString.split("!").head).mkString(",") + lazy val rapidsJarsVers = rapidsJarURLs.map { url => scala.io.Source.fromInputStream(url.openStream()).mkString("") }.mkString(",") require(rapidsJarURLs.size <= 1, - s"Multiple rapids4spark jars found in the classpath, please make sure there is only " + - s"one rapids4spark jar in the classpath. Version info: \n$rapidsJars") + s"Multiple rapids4spark jars found in the classpath: $rapidsJars, please make sure " + + s"there is only one rapids4spark jar in the classpath. Version info: \n$rapidsJarsVers") val cudfJarURLs = classloader.getResources(CUDF_PROPS_FILENAME) .asScala.toList - lazy val cudfJars = cudfJarURLs.map { + lazy val cudfJars = cudfJarURLs.map(_.toString.split("!").head).mkString(",") + lazy val cudfJarsVers = cudfJarURLs.map { url => scala.io.Source.fromInputStream(url.openStream()).mkString("") }.mkString(",") require(cudfJarURLs.size <= 1, - s"Multiple cudf jars found in the classpath, please make sure there is only " + - s"one cudf jar in the classpath. Version info: \n$cudfJars") + s"Multiple cudf jars found in the classpath: $cudfJars, please make sure there is only " + + s"one cudf jar in the classpath. Version info: \n$cudfJarsVers") val jniJarURLs = classloader.getResources(JNI_PROPS_FILENAME) .asScala.toList - lazy val jniJars = jniJarURLs.map { + lazy val jniJars = jniJarURLs.map(_.toString.split("!").head).mkString(",") + lazy val jniJarsVers = jniJarURLs.map { url => scala.io.Source.fromInputStream(url.openStream()).mkString("") }.mkString(",") require(jniJarURLs.size <= 1, - s"Multiple spark-rapids-jni jars found in the classpath, please make sure there is only " + - s"one spark-rapids-jni jar in the classpath. Version info: \n$jniJars") + s"Multiple spark-rapids-jni jars found in the classpath: $jniJars, please make sure " + + s"there is only one spark-rapids-jni jar in the classpath. Version info: \n$jniJarsVers") } // This assumes Apache Spark logic, if CSPs are setting defaults differently, we may need From ded091e8777a137b41964c6aad4953421f8ca7ad Mon Sep 17 00:00:00 2001 From: Haoyang Li Date: Thu, 9 Nov 2023 10:59:18 +0800 Subject: [PATCH 07/31] address comments Signed-off-by: Haoyang Li --- .../com/nvidia/spark/rapids/Plugin.scala | 52 ++++++++----------- 1 file changed, 21 insertions(+), 31 deletions(-) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala index c15fa0a47e6..72a6bb522a0 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala @@ -111,35 +111,29 @@ object RapidsPluginUtils extends Logging { } } - def detectMultiplePluginJars(): Unit = { + private def detectMultipleJar(propName: String, jarName: String, + complainFun: (Boolean, String) => Unit): Unit = { val classloader = ShimLoader.getShimClassLoader() - val rapidsJarURLs = classloader.getResources(PLUGIN_PROPS_FILENAME) - .asScala.toList + val rapidsJarURLs = classloader.getResources(propName).asScala.toList lazy val rapidsJars = rapidsJarURLs.map(_.toString.split("!").head).mkString(",") lazy val rapidsJarsVers = rapidsJarURLs.map { url => scala.io.Source.fromInputStream(url.openStream()).mkString("") }.mkString(",") - require(rapidsJarURLs.size <= 1, - s"Multiple rapids4spark jars found in the classpath: $rapidsJars, please make sure " + - s"there is only one rapids4spark jar in the classpath. Version info: \n$rapidsJarsVers") - val cudfJarURLs = classloader.getResources(CUDF_PROPS_FILENAME) - .asScala.toList - lazy val cudfJars = cudfJarURLs.map(_.toString.split("!").head).mkString(",") - lazy val cudfJarsVers = cudfJarURLs.map { - url => scala.io.Source.fromInputStream(url.openStream()).mkString("") - }.mkString(",") - require(cudfJarURLs.size <= 1, - s"Multiple cudf jars found in the classpath: $cudfJars, please make sure there is only " + - s"one cudf jar in the classpath. Version info: \n$cudfJarsVers") - val jniJarURLs = classloader.getResources(JNI_PROPS_FILENAME) - .asScala.toList - lazy val jniJars = jniJarURLs.map(_.toString.split("!").head).mkString(",") - lazy val jniJarsVers = jniJarURLs.map { - url => scala.io.Source.fromInputStream(url.openStream()).mkString("") - }.mkString(",") - require(jniJarURLs.size <= 1, - s"Multiple spark-rapids-jni jars found in the classpath: $jniJars, please make sure " + - s"there is only one spark-rapids-jni jar in the classpath. Version info: \n$jniJarsVers") + complainFun(rapidsJarURLs.size <= 1, + s"Multiple $jarName jars found in the classpath: $rapidsJars, please make sure there " + + s"is only one $jarName jar in the classpath. If it is impossible to fix the classpath " + + s"you can suppress the error by setting ${RapidsConf.ALLOW_MULTIPLE_JARS.key} to true" + + s"Version info: \n$rapidsJarsVers") + } + + def detectMultipleJars(conf: RapidsConf): Unit = { + val complainFun: (Boolean, String) => Unit = conf.allowMultipleJars match { + case true => (request: Boolean, msg: String) => if (!request) logWarning(msg) + case false => (request: Boolean, msg: String) => require(request, msg) + } + detectMultipleJar(PLUGIN_PROPS_FILENAME, "rapids4spark", complainFun) + detectMultipleJar(JNI_PROPS_FILENAME, "spark-rapids-jni", complainFun) + detectMultipleJar(CUDF_PROPS_FILENAME, "cudf", complainFun) } // This assumes Apache Spark logic, if CSPs are setting defaults differently, we may need @@ -340,9 +334,7 @@ class RapidsDriverPlugin extends DriverPlugin with Logging { val sparkConf = pluginContext.conf RapidsPluginUtils.fixupConfigsOnDriver(sparkConf) val conf = new RapidsConf(sparkConf) - if (!conf.allowMultipleJars) { - RapidsPluginUtils.detectMultiplePluginJars() - } + RapidsPluginUtils.detectMultipleJars(conf) RapidsPluginUtils.logPluginMode(conf) GpuCoreDumpHandler.driverInit(sc, conf) @@ -398,10 +390,8 @@ class RapidsExecutorPlugin extends ExecutorPlugin with Logging { val numCores = RapidsPluginUtils.estimateCoresOnExec(sparkConf) val conf = new RapidsConf(extraConf.asScala.toMap) - if (!conf.allowMultipleJars) { - // Fail if there are multiple plugin jars in the classpath. - RapidsPluginUtils.detectMultiplePluginJars() - } + // Fail if there are multiple plugin jars in the classpath. + RapidsPluginUtils.detectMultipleJars(conf) // Compare if the cudf version mentioned in the classpath is equal to the version which // plugin expects. If there is a version mismatch, throw error. This check can be disabled From 42fc474b453d0de593f3a2e2f8011bf2b27ba259 Mon Sep 17 00:00:00 2001 From: Haoyang Li Date: Thu, 9 Nov 2023 21:10:26 +0800 Subject: [PATCH 08/31] Update sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala Co-authored-by: Gera Shegalov --- sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala index 72a6bb522a0..4d834d9a6ff 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala @@ -122,7 +122,7 @@ object RapidsPluginUtils extends Logging { complainFun(rapidsJarURLs.size <= 1, s"Multiple $jarName jars found in the classpath: $rapidsJars, please make sure there " + s"is only one $jarName jar in the classpath. If it is impossible to fix the classpath " + - s"you can suppress the error by setting ${RapidsConf.ALLOW_MULTIPLE_JARS.key} to true" + + s"you can suppress the error by setting ${RapidsConf.ALLOW_MULTIPLE_JARS.key} to true. " + s"Version info: \n$rapidsJarsVers") } From 6bbedfb94eec4fb7e3f657ff3382325c74cbbca5 Mon Sep 17 00:00:00 2001 From: Haoyang Li Date: Thu, 9 Nov 2023 21:48:37 +0800 Subject: [PATCH 09/31] Update sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala Co-authored-by: Gera Shegalov --- sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala index 4d834d9a6ff..a37c617a401 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala @@ -114,7 +114,7 @@ object RapidsPluginUtils extends Logging { private def detectMultipleJar(propName: String, jarName: String, complainFun: (Boolean, String) => Unit): Unit = { val classloader = ShimLoader.getShimClassLoader() - val rapidsJarURLs = classloader.getResources(propName).asScala.toList + val rapidsJarURLs = classloader.getResources(propName).asScala.toSet lazy val rapidsJars = rapidsJarURLs.map(_.toString.split("!").head).mkString(",") lazy val rapidsJarsVers = rapidsJarURLs.map { url => scala.io.Source.fromInputStream(url.openStream()).mkString("") From d9264d8da22c0c1a4ba0a05cdd4a42748eb3d1c3 Mon Sep 17 00:00:00 2001 From: Haoyang Li Date: Thu, 9 Nov 2023 10:59:18 +0800 Subject: [PATCH 10/31] address comments Signed-off-by: Haoyang Li --- dist/scripts/check-shims-revisions.sh | 9 ++++++--- dist/unshimmed-common-from-spark311.txt | 2 +- integration_tests/src/assembly/bin.xml | 11 +++++++---- jenkins/spark-tests.sh | 6 +++--- pom.xml | 3 ++- scala2.13/pom.xml | 2 +- .../main/scala/com/nvidia/spark/rapids/Plugin.scala | 2 +- 7 files changed, 21 insertions(+), 14 deletions(-) diff --git a/dist/scripts/check-shims-revisions.sh b/dist/scripts/check-shims-revisions.sh index 8a9e422b2e1..6e97e7635c3 100755 --- a/dist/scripts/check-shims-revisions.sh +++ b/dist/scripts/check-shims-revisions.sh @@ -1,7 +1,7 @@ #!/bin/bash # -# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -26,12 +26,15 @@ function check-shims-revisions() { parallel_dir=${PWD}/parallel-world pre_revision="" pre_shim_version_path="" - + IFS="," for shim in ${included_buildvers}; do # trim shim=$(echo "${shim}" | xargs) - shim_version_path="${parallel_dir}/spark${shim}/rapids4spark-version-info.properties" + shim_version_path=$(find "${parallel_dir}/spark${shim}/" -name "rapids-4-spark*version-info.properties" | head -n 1) + echo "Checking shim: ${shim}" + find "${parallel_dir}/spark${shim}/" -name "*.properties" | xargs printf "%s\n" + # shim_version_path="${parallel_dir}/spark${shim}/rapids-4-spark-sql_2.12-version-info.properties" if [[ -f "$shim_version_path" ]] ; then curr_revision=$(grep "revision=" "${shim_version_path}" | cut -d'=' -f2) if [ -n "$pre_revision" ] && [[ "$curr_revision" != "$pre_revision" ]] ; then diff --git a/dist/unshimmed-common-from-spark311.txt b/dist/unshimmed-common-from-spark311.txt index 772360b1b37..bfe3f568dc5 100644 --- a/dist/unshimmed-common-from-spark311.txt +++ b/dist/unshimmed-common-from-spark311.txt @@ -16,4 +16,4 @@ com/nvidia/spark/rapids/optimizer/SQLOptimizerPlugin* org/apache/spark/sql/rapids/AdaptiveSparkPlanHelperShim* org/apache/spark/sql/rapids/ExecutionPlanCaptureCallback* rapids/*.py -rapids4spark-version-info.properties +rapids-4-spark_2.1?-version-info.properties diff --git a/integration_tests/src/assembly/bin.xml b/integration_tests/src/assembly/bin.xml index c992b073eae..e145b9b6f69 100644 --- a/integration_tests/src/assembly/bin.xml +++ b/integration_tests/src/assembly/bin.xml @@ -46,12 +46,15 @@ ${spark.rapids.source.basedir}/integration_tests/requirements.txt integration_tests - - ${project.build.directory}/extra-resources/rapids4spark-version-info.properties - integration_tests - + + ${project.build.directory}/extra-resources + integration_tests + + rapids-4-spark-integration-tests_2.1*-version-info.properties + + ${spark.rapids.source.basedir}/integration_tests/src/main/python integration_tests/src/main/python diff --git a/jenkins/spark-tests.sh b/jenkins/spark-tests.sh index 17cf4c747f7..a5ef4023e01 100755 --- a/jenkins/spark-tests.sh +++ b/jenkins/spark-tests.sh @@ -78,11 +78,11 @@ getRevision() { set +x echo -e "\n==================== ARTIFACTS BUILD INFO ====================\n" >> "$tmp_info" echo "-------------------- rapids-4-spark BUILD INFO --------------------" >> "$tmp_info" -p_ver=$(getRevision $RAPIDS_PLUGIN_JAR rapids4spark-version-info.properties) +p_ver=$(getRevision $RAPIDS_PLUGIN_JAR rapids-4-spark_2.12-version-info.properties) echo "-------------------- rapids-4-spark-integration-tests BUILD INFO --------------------" >> "$tmp_info" -it_ver=$(getRevision $RAPIDS_TEST_JAR rapids4spark-version-info.properties) +it_ver=$(getRevision $RAPIDS_TEST_JAR rapids-4-spark-integration-tests_2.12-version-info.properties) echo "-------------------- rapids-4-spark-integration-tests pytest BUILD INFO --------------------" >> "$tmp_info" -pt_ver=$(getRevision $RAPIDS_INT_TESTS_TGZ integration_tests/rapids4spark-version-info.properties) +pt_ver=$(getRevision $RAPIDS_INT_TESTS_TGZ integration_tests/rapids-4-spark-integration-tests_2.12-version-info.properties) echo -e "\n==================== ARTIFACTS BUILD INFO ====================\n" >> "$tmp_info" set -x cat "$tmp_info" || true diff --git a/pom.xml b/pom.xml index bf0a18385f1..572735362fd 100644 --- a/pom.xml +++ b/pom.xml @@ -920,8 +920,9 @@ + diff --git a/scala2.13/pom.xml b/scala2.13/pom.xml index 34531329ef2..89fb9a29d67 100644 --- a/scala2.13/pom.xml +++ b/scala2.13/pom.xml @@ -921,7 +921,7 @@ diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala index a37c617a401..badf244dfd9 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala @@ -55,7 +55,7 @@ case class ColumnarOverrideRules() extends ColumnarRule with Logging { object RapidsPluginUtils extends Logging { val CUDF_PROPS_FILENAME = "cudf-java-version-info.properties" val JNI_PROPS_FILENAME = "spark-rapids-jni-version-info.properties" - val PLUGIN_PROPS_FILENAME = "rapids4spark-version-info.properties" + val PLUGIN_PROPS_FILENAME = "rapids-4-spark_2.12-version-info.properties" private val SQL_PLUGIN_NAME = classOf[SQLExecPlugin].getName private val UDF_PLUGIN_NAME = "com.nvidia.spark.udf.Plugin" From c2a04025f5b8ee3359ec2f79f29856f8b6934db4 Mon Sep 17 00:00:00 2001 From: Haoyang Li Date: Mon, 13 Nov 2023 11:43:59 +0800 Subject: [PATCH 11/31] Use unique properties for intermediate jars Signed-off-by: Haoyang Li --- dist/scripts/check-shims-revisions.sh | 3 --- pom.xml | 1 - .../src/main/scala/com/nvidia/spark/rapids/Plugin.scala | 4 ++-- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/dist/scripts/check-shims-revisions.sh b/dist/scripts/check-shims-revisions.sh index 6e97e7635c3..9e258e00820 100755 --- a/dist/scripts/check-shims-revisions.sh +++ b/dist/scripts/check-shims-revisions.sh @@ -32,9 +32,6 @@ function check-shims-revisions() { # trim shim=$(echo "${shim}" | xargs) shim_version_path=$(find "${parallel_dir}/spark${shim}/" -name "rapids-4-spark*version-info.properties" | head -n 1) - echo "Checking shim: ${shim}" - find "${parallel_dir}/spark${shim}/" -name "*.properties" | xargs printf "%s\n" - # shim_version_path="${parallel_dir}/spark${shim}/rapids-4-spark-sql_2.12-version-info.properties" if [[ -f "$shim_version_path" ]] ; then curr_revision=$(grep "revision=" "${shim_version_path}" | cut -d'=' -f2) if [ -n "$pre_revision" ] && [[ "$curr_revision" != "$pre_revision" ]] ; then diff --git a/pom.xml b/pom.xml index 084acf06119..566e6698a16 100644 --- a/pom.xml +++ b/pom.xml @@ -974,7 +974,6 @@ - Date: Mon, 13 Nov 2023 12:26:36 +0800 Subject: [PATCH 12/31] clean up Signed-off-by: Haoyang Li --- dist/scripts/check-shims-revisions.sh | 2 +- scala2.13/pom.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dist/scripts/check-shims-revisions.sh b/dist/scripts/check-shims-revisions.sh index 9e258e00820..9b40ef1dbc2 100755 --- a/dist/scripts/check-shims-revisions.sh +++ b/dist/scripts/check-shims-revisions.sh @@ -26,7 +26,7 @@ function check-shims-revisions() { parallel_dir=${PWD}/parallel-world pre_revision="" pre_shim_version_path="" - + IFS="," for shim in ${included_buildvers}; do # trim diff --git a/scala2.13/pom.xml b/scala2.13/pom.xml index de76a4e5740..ce9a83d76b6 100644 --- a/scala2.13/pom.xml +++ b/scala2.13/pom.xml @@ -975,7 +975,7 @@ From b0993586e4927ce007f47a31b713ba2383d69b90 Mon Sep 17 00:00:00 2001 From: Haoyang Li Date: Mon, 13 Nov 2023 19:15:05 +0800 Subject: [PATCH 13/31] address comment Signed-off-by: Haoyang Li --- .../com/nvidia/spark/rapids/Plugin.scala | 30 ++++++++++--------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala index 54b97c0e55a..9ec89804d35 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala @@ -112,29 +112,31 @@ object RapidsPluginUtils extends Logging { } } - private def detectMultipleJar(propName: String, jarName: String, - complainFun: (Boolean, String) => Unit): Unit = { + private def detectMultipleJar(propName: String, jarName: String, conf: RapidsConf): Unit = { val classloader = ShimLoader.getShimClassLoader() val rapidsJarURLs = classloader.getResources(propName).asScala.toSet lazy val rapidsJars = rapidsJarURLs.map(_.toString.split("!").head).mkString(",") lazy val rapidsJarsVers = rapidsJarURLs.map { url => scala.io.Source.fromInputStream(url.openStream()).mkString("") }.mkString(",") - complainFun(rapidsJarURLs.size <= 1, - s"Multiple $jarName jars found in the classpath: $rapidsJars, please make sure there " + - s"is only one $jarName jar in the classpath. If it is impossible to fix the classpath " + - s"you can suppress the error by setting ${RapidsConf.ALLOW_MULTIPLE_JARS.key} to true. " + - s"Version info: \n$rapidsJarsVers") + lazy val msg = s"Multiple $jarName jars found in the classpath: $rapidsJars, please make " + + s"sure there is only one $jarName jar in the classpath. If it is impossible to fix the " + + s"classpath you can suppress the error by setting ${RapidsConf.ALLOW_MULTIPLE_JARS.key}" + + s"to true. Version info: \n$rapidsJarsVers" + conf.allowMultipleJars match { + case true => { + if (rapidsJarURLs.size > 1) { + logWarning(msg) + } + } + case false => require(rapidsJarURLs.size <= 1, msg) + } } def detectMultipleJars(conf: RapidsConf): Unit = { - val complainFun: (Boolean, String) => Unit = conf.allowMultipleJars match { - case true => (request: Boolean, msg: String) => if (!request) logWarning(msg) - case false => (request: Boolean, msg: String) => require(request, msg) - } - detectMultipleJar(PLUGIN_PROPS_FILENAME, "rapids4spark", complainFun) - detectMultipleJar(JNI_PROPS_FILENAME, "spark-rapids-jni", complainFun) - detectMultipleJar(CUDF_PROPS_FILENAME, "cudf", complainFun) + detectMultipleJar(PLUGIN_PROPS_FILENAME, "rapids4spark", conf) + detectMultipleJar(JNI_PROPS_FILENAME, "spark-rapids-jni", conf) + detectMultipleJar(CUDF_PROPS_FILENAME, "cudf", conf) } // This assumes Apache Spark logic, if CSPs are setting defaults differently, we may need From 74010e0fedf3a7412997e127d7080f2676d92a95 Mon Sep 17 00:00:00 2001 From: Haoyang Li Date: Tue, 14 Nov 2023 09:03:42 +0800 Subject: [PATCH 14/31] Apply suggestions from code review Co-authored-by: Gera Shegalov --- integration_tests/src/assembly/bin.xml | 2 +- sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/integration_tests/src/assembly/bin.xml b/integration_tests/src/assembly/bin.xml index e145b9b6f69..9e8091521d9 100644 --- a/integration_tests/src/assembly/bin.xml +++ b/integration_tests/src/assembly/bin.xml @@ -52,7 +52,7 @@ ${project.build.directory}/extra-resources integration_tests - rapids-4-spark-integration-tests_2.1*-version-info.properties + ${project.artifactId}-version-info.properties diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala index 9ec89804d35..9301e9b3be0 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala @@ -134,7 +134,7 @@ object RapidsPluginUtils extends Logging { } def detectMultipleJars(conf: RapidsConf): Unit = { - detectMultipleJar(PLUGIN_PROPS_FILENAME, "rapids4spark", conf) + detectMultipleJar(PLUGIN_PROPS_FILENAME, "rapids-4-spark", conf) detectMultipleJar(JNI_PROPS_FILENAME, "spark-rapids-jni", conf) detectMultipleJar(CUDF_PROPS_FILENAME, "cudf", conf) } From fd129c90556f6718830ae6ecdfc75b2bedc57ca4 Mon Sep 17 00:00:00 2001 From: Haoyang Li Date: Tue, 14 Nov 2023 09:39:13 +0800 Subject: [PATCH 15/31] address comments Signed-off-by: Haoyang Li --- dist/scripts/check-shims-revisions.sh | 3 ++- jenkins/spark-tests.sh | 6 +++--- .../scala/com/nvidia/spark/rapids/Plugin.scala | 18 ++++++++++++------ 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/dist/scripts/check-shims-revisions.sh b/dist/scripts/check-shims-revisions.sh index 9b40ef1dbc2..e3eb9862e54 100755 --- a/dist/scripts/check-shims-revisions.sh +++ b/dist/scripts/check-shims-revisions.sh @@ -31,7 +31,8 @@ function check-shims-revisions() { for shim in ${included_buildvers}; do # trim shim=$(echo "${shim}" | xargs) - shim_version_path=$(find "${parallel_dir}/spark${shim}/" -name "rapids-4-spark*version-info.properties" | head -n 1) + shim_version_files=("${parallel_dir}/spark${shim}"/rapids-4-spark*version-info.properties ) + shim_version_path="${shim_version_files[0]}" if [[ -f "$shim_version_path" ]] ; then curr_revision=$(grep "revision=" "${shim_version_path}" | cut -d'=' -f2) if [ -n "$pre_revision" ] && [[ "$curr_revision" != "$pre_revision" ]] ; then diff --git a/jenkins/spark-tests.sh b/jenkins/spark-tests.sh index a5ef4023e01..493ad90d1c0 100755 --- a/jenkins/spark-tests.sh +++ b/jenkins/spark-tests.sh @@ -78,11 +78,11 @@ getRevision() { set +x echo -e "\n==================== ARTIFACTS BUILD INFO ====================\n" >> "$tmp_info" echo "-------------------- rapids-4-spark BUILD INFO --------------------" >> "$tmp_info" -p_ver=$(getRevision $RAPIDS_PLUGIN_JAR rapids-4-spark_2.12-version-info.properties) +p_ver=$(getRevision $RAPIDS_PLUGIN_JAR rapids-4-spark_${SCALA_BINARY_VER}-version-info.properties) echo "-------------------- rapids-4-spark-integration-tests BUILD INFO --------------------" >> "$tmp_info" -it_ver=$(getRevision $RAPIDS_TEST_JAR rapids-4-spark-integration-tests_2.12-version-info.properties) +it_ver=$(getRevision $RAPIDS_TEST_JAR rapids-4-spark-integration-tests_${SCALA_BINARY_VER}-version-info.properties) echo "-------------------- rapids-4-spark-integration-tests pytest BUILD INFO --------------------" >> "$tmp_info" -pt_ver=$(getRevision $RAPIDS_INT_TESTS_TGZ integration_tests/rapids-4-spark-integration-tests_2.12-version-info.properties) +pt_ver=$(getRevision $RAPIDS_INT_TESTS_TGZ integration_tests/rapids-4-spark-integration-tests_${SCALA_BINARY_VER}-version-info.properties) echo -e "\n==================== ARTIFACTS BUILD INFO ====================\n" >> "$tmp_info" set -x cat "$tmp_info" || true diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala index 9301e9b3be0..55cd0d5c6f4 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala @@ -56,7 +56,7 @@ object RapidsPluginUtils extends Logging { val CUDF_PROPS_FILENAME = "cudf-java-version-info.properties" val JNI_PROPS_FILENAME = "spark-rapids-jni-version-info.properties" val SCALA_VERSION = scala.util.Properties.versionNumberString.split("\\.").take(2).mkString(".") - val PLUGIN_PROPS_FILENAME = "rapids-4-spark_" + SCALA_VERSION + "-version-info.properties" + val PLUGIN_PROPS_FILENAME = s"rapids-4-spark_$SCALA_VERSION-version-info.properties" private val SQL_PLUGIN_NAME = classOf[SQLExecPlugin].getName private val UDF_PLUGIN_NAME = "com.nvidia.spark.udf.Plugin" @@ -112,9 +112,11 @@ object RapidsPluginUtils extends Logging { } } - private def detectMultipleJar(propName: String, jarName: String, conf: RapidsConf): Unit = { + private def detectMultipleJar(propNames: Seq[String], jarName: String, conf: RapidsConf): Unit = { val classloader = ShimLoader.getShimClassLoader() - val rapidsJarURLs = classloader.getResources(propName).asScala.toSet + val rapidsJarURLs = propNames.flatMap(propName => + classloader.getResources(propName).asScala + ).toSet lazy val rapidsJars = rapidsJarURLs.map(_.toString.split("!").head).mkString(",") lazy val rapidsJarsVers = rapidsJarURLs.map { url => scala.io.Source.fromInputStream(url.openStream()).mkString("") @@ -134,9 +136,13 @@ object RapidsPluginUtils extends Logging { } def detectMultipleJars(conf: RapidsConf): Unit = { - detectMultipleJar(PLUGIN_PROPS_FILENAME, "rapids-4-spark", conf) - detectMultipleJar(JNI_PROPS_FILENAME, "spark-rapids-jni", conf) - detectMultipleJar(CUDF_PROPS_FILENAME, "cudf", conf) + val propsName212 = "rapids-4-spark_2.12-version-info.properties" + val propsName213 = "rapids-4-spark_2.13-version-info.properties" + val propsNameOld = "rapids4spark-version-info.properties" + val possibleProps = Seq(propsName212, propsName213, propsNameOld) + detectMultipleJar(possibleProps, "rapids-4-spark", conf) + detectMultipleJar(Seq(JNI_PROPS_FILENAME), "spark-rapids-jni", conf) + detectMultipleJar(Seq(CUDF_PROPS_FILENAME), "cudf", conf) } // This assumes Apache Spark logic, if CSPs are setting defaults differently, we may need From 5e292c4f8b6134f787be630e725fce337b317540 Mon Sep 17 00:00:00 2001 From: Haoyang Li Date: Tue, 14 Nov 2023 15:58:59 +0800 Subject: [PATCH 16/31] add the project.artifactId to build-info and check it Signed-off-by: Haoyang Li --- build/build-info | 5 +- dist/scripts/check-shims-revisions.sh | 3 +- dist/unshimmed-common-from-spark311.txt | 2 +- integration_tests/src/assembly/bin.xml | 11 ++-- jenkins/spark-tests.sh | 6 +- pom.xml | 3 +- scala2.13/pom.xml | 2 +- .../com/nvidia/spark/rapids/Plugin.scala | 56 ++++++++++--------- 8 files changed, 46 insertions(+), 42 deletions(-) diff --git a/build/build-info b/build/build-info index 88cd8516d13..aa981273a49 100755 --- a/build/build-info +++ b/build/build-info @@ -1,7 +1,7 @@ #!/usr/bin/env bash # -# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -24,6 +24,7 @@ set -e echo_build_properties() { echo version=$1 echo cudf_version=$2 + echo project_name=$3 echo user=$USER echo revision=$(git rev-parse HEAD) echo branch=$(git rev-parse --abbrev-ref HEAD) @@ -31,4 +32,4 @@ echo_build_properties() { echo url=$(git config --get remote.origin.url) } -echo_build_properties $1 $2 +echo_build_properties $1 $2 $3 diff --git a/dist/scripts/check-shims-revisions.sh b/dist/scripts/check-shims-revisions.sh index e3eb9862e54..b3b93099a42 100755 --- a/dist/scripts/check-shims-revisions.sh +++ b/dist/scripts/check-shims-revisions.sh @@ -31,8 +31,7 @@ function check-shims-revisions() { for shim in ${included_buildvers}; do # trim shim=$(echo "${shim}" | xargs) - shim_version_files=("${parallel_dir}/spark${shim}"/rapids-4-spark*version-info.properties ) - shim_version_path="${shim_version_files[0]}" + shim_version_path="${parallel_dir}/spark${shim}/rapids4spark-version-info.properties" if [[ -f "$shim_version_path" ]] ; then curr_revision=$(grep "revision=" "${shim_version_path}" | cut -d'=' -f2) if [ -n "$pre_revision" ] && [[ "$curr_revision" != "$pre_revision" ]] ; then diff --git a/dist/unshimmed-common-from-spark311.txt b/dist/unshimmed-common-from-spark311.txt index bfe3f568dc5..772360b1b37 100644 --- a/dist/unshimmed-common-from-spark311.txt +++ b/dist/unshimmed-common-from-spark311.txt @@ -16,4 +16,4 @@ com/nvidia/spark/rapids/optimizer/SQLOptimizerPlugin* org/apache/spark/sql/rapids/AdaptiveSparkPlanHelperShim* org/apache/spark/sql/rapids/ExecutionPlanCaptureCallback* rapids/*.py -rapids-4-spark_2.1?-version-info.properties +rapids4spark-version-info.properties diff --git a/integration_tests/src/assembly/bin.xml b/integration_tests/src/assembly/bin.xml index 9e8091521d9..c992b073eae 100644 --- a/integration_tests/src/assembly/bin.xml +++ b/integration_tests/src/assembly/bin.xml @@ -46,15 +46,12 @@ ${spark.rapids.source.basedir}/integration_tests/requirements.txt integration_tests + + ${project.build.directory}/extra-resources/rapids4spark-version-info.properties + integration_tests + - - ${project.build.directory}/extra-resources - integration_tests - - ${project.artifactId}-version-info.properties - - ${spark.rapids.source.basedir}/integration_tests/src/main/python integration_tests/src/main/python diff --git a/jenkins/spark-tests.sh b/jenkins/spark-tests.sh index 493ad90d1c0..17cf4c747f7 100755 --- a/jenkins/spark-tests.sh +++ b/jenkins/spark-tests.sh @@ -78,11 +78,11 @@ getRevision() { set +x echo -e "\n==================== ARTIFACTS BUILD INFO ====================\n" >> "$tmp_info" echo "-------------------- rapids-4-spark BUILD INFO --------------------" >> "$tmp_info" -p_ver=$(getRevision $RAPIDS_PLUGIN_JAR rapids-4-spark_${SCALA_BINARY_VER}-version-info.properties) +p_ver=$(getRevision $RAPIDS_PLUGIN_JAR rapids4spark-version-info.properties) echo "-------------------- rapids-4-spark-integration-tests BUILD INFO --------------------" >> "$tmp_info" -it_ver=$(getRevision $RAPIDS_TEST_JAR rapids-4-spark-integration-tests_${SCALA_BINARY_VER}-version-info.properties) +it_ver=$(getRevision $RAPIDS_TEST_JAR rapids4spark-version-info.properties) echo "-------------------- rapids-4-spark-integration-tests pytest BUILD INFO --------------------" >> "$tmp_info" -pt_ver=$(getRevision $RAPIDS_INT_TESTS_TGZ integration_tests/rapids-4-spark-integration-tests_${SCALA_BINARY_VER}-version-info.properties) +pt_ver=$(getRevision $RAPIDS_INT_TESTS_TGZ integration_tests/rapids4spark-version-info.properties) echo -e "\n==================== ARTIFACTS BUILD INFO ====================\n" >> "$tmp_info" set -x cat "$tmp_info" || true diff --git a/pom.xml b/pom.xml index 566e6698a16..96a97ebebda 100644 --- a/pom.xml +++ b/pom.xml @@ -975,13 +975,14 @@ + diff --git a/scala2.13/pom.xml b/scala2.13/pom.xml index ce9a83d76b6..629692d6e65 100644 --- a/scala2.13/pom.xml +++ b/scala2.13/pom.xml @@ -975,7 +975,7 @@ diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala index 55cd0d5c6f4..3b4f9c9fda2 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala @@ -55,8 +55,7 @@ case class ColumnarOverrideRules() extends ColumnarRule with Logging { object RapidsPluginUtils extends Logging { val CUDF_PROPS_FILENAME = "cudf-java-version-info.properties" val JNI_PROPS_FILENAME = "spark-rapids-jni-version-info.properties" - val SCALA_VERSION = scala.util.Properties.versionNumberString.split("\\.").take(2).mkString(".") - val PLUGIN_PROPS_FILENAME = s"rapids-4-spark_$SCALA_VERSION-version-info.properties" + val PLUGIN_PROPS_FILENAME = "rapids4spark-version-info.properties" private val SQL_PLUGIN_NAME = classOf[SQLExecPlugin].getName private val UDF_PLUGIN_NAME = "com.nvidia.spark.udf.Plugin" @@ -71,6 +70,10 @@ object RapidsPluginUtils extends Logging { private val EXECUTOR_GPU_AMOUNT_KEY = "spark.executor.resource.gpu.amount" private val SPARK_MASTER = "spark.master" + private val pluginPropsNames = Seq("rapids-4-spark-sql_2.12", "rapids-4-spark-sql_2.13") + private val jniPropsNames = Seq("spark-rapids-jni") + private val cudfPropsNames = Seq("cudf") + { val pluginProps = loadProps(RapidsPluginUtils.PLUGIN_PROPS_FILENAME) logInfo(s"RAPIDS Accelerator build: $pluginProps") @@ -112,37 +115,40 @@ object RapidsPluginUtils extends Logging { } } - private def detectMultipleJar(propNames: Seq[String], jarName: String, conf: RapidsConf): Unit = { + private def detectMultipleJar(propName: String, jarName: String, propsName: Seq[String], + conf: RapidsConf): Unit = { val classloader = ShimLoader.getShimClassLoader() - val rapidsJarURLs = propNames.flatMap(propName => - classloader.getResources(propName).asScala - ).toSet - lazy val rapidsJars = rapidsJarURLs.map(_.toString.split("!").head).mkString(",") - lazy val rapidsJarsVers = rapidsJarURLs.map { - url => scala.io.Source.fromInputStream(url.openStream()).mkString("") - }.mkString(",") - lazy val msg = s"Multiple $jarName jars found in the classpath: $rapidsJars, please make " + + val possibleRapidsJarURLs = classloader.getResources(propName).asScala.toSet + val (rapidsJarsURLs, rapidsJarsVers) = possibleRapidsJarURLs.flatMap { + url => { + val ver = scala.io.Source.fromInputStream(url.openStream()).mkString("") + println(ver) + if (propsName.exists(ver.contains(_))) { + Some((url, ver)) + } else { + None + } + } + }.unzip + lazy val rapidsJarsMsg = rapidsJarsURLs.toList.map(_.toString.split("!").head).mkString(",") + lazy val rapidsJarsVersMsg = rapidsJarsVers.toList.mkString(",") + lazy val msg = s"Multiple $jarName jars found in the classpath: $rapidsJarsMsg, please make " + s"sure there is only one $jarName jar in the classpath. If it is impossible to fix the " + s"classpath you can suppress the error by setting ${RapidsConf.ALLOW_MULTIPLE_JARS.key}" + - s"to true. Version info: \n$rapidsJarsVers" - conf.allowMultipleJars match { - case true => { - if (rapidsJarURLs.size > 1) { - logWarning(msg) - } + s"to true. Version info: \n$rapidsJarsVersMsg" + if (conf.allowMultipleJars) { + if (rapidsJarsURLs.size > 1) { + logWarning(msg) } - case false => require(rapidsJarURLs.size <= 1, msg) + } else { + require(rapidsJarsURLs.size <= 1, msg) } } def detectMultipleJars(conf: RapidsConf): Unit = { - val propsName212 = "rapids-4-spark_2.12-version-info.properties" - val propsName213 = "rapids-4-spark_2.13-version-info.properties" - val propsNameOld = "rapids4spark-version-info.properties" - val possibleProps = Seq(propsName212, propsName213, propsNameOld) - detectMultipleJar(possibleProps, "rapids-4-spark", conf) - detectMultipleJar(Seq(JNI_PROPS_FILENAME), "spark-rapids-jni", conf) - detectMultipleJar(Seq(CUDF_PROPS_FILENAME), "cudf", conf) + detectMultipleJar(PLUGIN_PROPS_FILENAME, "rapids-4-spark", pluginPropsNames, conf) + detectMultipleJar(JNI_PROPS_FILENAME, "spark-rapids-jni", jniPropsNames, conf) + detectMultipleJar(CUDF_PROPS_FILENAME, "cudf", cudfPropsNames, conf) } // This assumes Apache Spark logic, if CSPs are setting defaults differently, we may need From 15022d733d334611e7fa162c5172e682282c3f05 Mon Sep 17 00:00:00 2001 From: Haoyang Li Date: Tue, 14 Nov 2023 16:42:24 +0800 Subject: [PATCH 17/31] remove unnecessary copyright update Signed-off-by: Haoyang Li --- dist/scripts/check-shims-revisions.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dist/scripts/check-shims-revisions.sh b/dist/scripts/check-shims-revisions.sh index b3b93099a42..8a9e422b2e1 100755 --- a/dist/scripts/check-shims-revisions.sh +++ b/dist/scripts/check-shims-revisions.sh @@ -1,7 +1,7 @@ #!/bin/bash # -# Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 28c8dcd68f624c4696cbb7532cfc891c04956028 Mon Sep 17 00:00:00 2001 From: Haoyang Li Date: Tue, 14 Nov 2023 16:55:38 +0800 Subject: [PATCH 18/31] remove log Signed-off-by: Haoyang Li --- sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala index 3b4f9c9fda2..fa12c9e9a12 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala @@ -122,7 +122,6 @@ object RapidsPluginUtils extends Logging { val (rapidsJarsURLs, rapidsJarsVers) = possibleRapidsJarURLs.flatMap { url => { val ver = scala.io.Source.fromInputStream(url.openStream()).mkString("") - println(ver) if (propsName.exists(ver.contains(_))) { Some((url, ver)) } else { From 3dc12e49ffb194eff7dcc0d00cde3d81f17d01e1 Mon Sep 17 00:00:00 2001 From: Haoyang Li Date: Tue, 14 Nov 2023 17:37:32 +0800 Subject: [PATCH 19/31] Add 2.13 support Signed-off-by: Haoyang Li --- scala2.13/pom.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/scala2.13/pom.xml b/scala2.13/pom.xml index 629692d6e65..d300dd7ba4d 100644 --- a/scala2.13/pom.xml +++ b/scala2.13/pom.xml @@ -982,6 +982,7 @@ + From 14fe6fc25a384f8044d1848ba71b6bd366fa8ddf Mon Sep 17 00:00:00 2001 From: Haoyang Li Date: Wed, 15 Nov 2023 15:40:02 +0800 Subject: [PATCH 20/31] use revision to check duplicate jars Signed-off-by: Haoyang Li --- build/build-info | 5 +- pom.xml | 1 - scala2.13/pom.xml | 1 - .../com/nvidia/spark/rapids/Plugin.scala | 59 ++++++++++--------- 4 files changed, 34 insertions(+), 32 deletions(-) diff --git a/build/build-info b/build/build-info index aa981273a49..88cd8516d13 100755 --- a/build/build-info +++ b/build/build-info @@ -1,7 +1,7 @@ #!/usr/bin/env bash # -# Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -24,7 +24,6 @@ set -e echo_build_properties() { echo version=$1 echo cudf_version=$2 - echo project_name=$3 echo user=$USER echo revision=$(git rev-parse HEAD) echo branch=$(git rev-parse --abbrev-ref HEAD) @@ -32,4 +31,4 @@ echo_build_properties() { echo url=$(git config --get remote.origin.url) } -echo_build_properties $1 $2 $3 +echo_build_properties $1 $2 diff --git a/pom.xml b/pom.xml index 96a97ebebda..afb519ffc03 100644 --- a/pom.xml +++ b/pom.xml @@ -982,7 +982,6 @@ - diff --git a/scala2.13/pom.xml b/scala2.13/pom.xml index d300dd7ba4d..629692d6e65 100644 --- a/scala2.13/pom.xml +++ b/scala2.13/pom.xml @@ -982,7 +982,6 @@ - diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala index fa12c9e9a12..af30477cbe7 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala @@ -17,6 +17,7 @@ package com.nvidia.spark.rapids import java.lang.reflect.InvocationTargetException +import java.net.URL import java.time.ZoneId import java.util.Properties @@ -70,10 +71,6 @@ object RapidsPluginUtils extends Logging { private val EXECUTOR_GPU_AMOUNT_KEY = "spark.executor.resource.gpu.amount" private val SPARK_MASTER = "spark.master" - private val pluginPropsNames = Seq("rapids-4-spark-sql_2.12", "rapids-4-spark-sql_2.13") - private val jniPropsNames = Seq("spark-rapids-jni") - private val cudfPropsNames = Seq("cudf") - { val pluginProps = loadProps(RapidsPluginUtils.PLUGIN_PROPS_FILENAME) logInfo(s"RAPIDS Accelerator build: $pluginProps") @@ -115,39 +112,47 @@ object RapidsPluginUtils extends Logging { } } - private def detectMultipleJar(propName: String, jarName: String, propsName: Seq[String], - conf: RapidsConf): Unit = { + private def getRevisionFromURL(jarURL: URL): String = { + scala.io.Source.fromURL(jarURL).getLines().toSeq + .filter(_.startsWith("revision=")) + .map(_.split("=").last) + .headOption + .getOrElse("UNKNOWN") + } + + private def detectMultipleJar(propFileName: String, jarName: String, conf: RapidsConf): Unit = { val classloader = ShimLoader.getShimClassLoader() - val possibleRapidsJarURLs = classloader.getResources(propName).asScala.toSet - val (rapidsJarsURLs, rapidsJarsVers) = possibleRapidsJarURLs.flatMap { - url => { - val ver = scala.io.Source.fromInputStream(url.openStream()).mkString("") - if (propsName.exists(ver.contains(_))) { - Some((url, ver)) - } else { - None - } + val possibleRapidsJarURLs = classloader.getResources(propFileName).asScala.toList + + val revisionMap: Map[String, Seq[URL]] = possibleRapidsJarURLs.map { url => + val revision = getRevisionFromURL(url) + (revision, url) + }.groupBy(_._1).mapValues(_.map(_._2)) + lazy val rapidsJarsVersMsg = revisionMap.map { + case (revision, urls) => { + s"revison: $revision\n" + urls.map { + url => "jar URL: " + url.toString.split("!").head + "\n" + + scala.io.Source.fromInputStream(url.openStream()).mkString("") + }.mkString } - }.unzip - lazy val rapidsJarsMsg = rapidsJarsURLs.toList.map(_.toString.split("!").head).mkString(",") - lazy val rapidsJarsVersMsg = rapidsJarsVers.toList.mkString(",") - lazy val msg = s"Multiple $jarName jars found in the classpath: $rapidsJarsMsg, please make " + - s"sure there is only one $jarName jar in the classpath. If it is impossible to fix the " + - s"classpath you can suppress the error by setting ${RapidsConf.ALLOW_MULTIPLE_JARS.key}" + - s"to true. Version info: \n$rapidsJarsVersMsg" + }.mkString + lazy val msg = s"Multiple $jarName jars with different revision found in the classpath:\n" + + s"$rapidsJarsVersMsg \nPlease make sure there is only one $jarName jar in the classpath." + + s" If it is impossible to fix the classpath you can suppress the error by setting " + + s"${RapidsConf.ALLOW_MULTIPLE_JARS.key} to true." if (conf.allowMultipleJars) { - if (rapidsJarsURLs.size > 1) { + if (revisionMap.size > 1) { logWarning(msg) } } else { - require(rapidsJarsURLs.size <= 1, msg) + require(revisionMap.size == 1, msg) } } def detectMultipleJars(conf: RapidsConf): Unit = { - detectMultipleJar(PLUGIN_PROPS_FILENAME, "rapids-4-spark", pluginPropsNames, conf) - detectMultipleJar(JNI_PROPS_FILENAME, "spark-rapids-jni", jniPropsNames, conf) - detectMultipleJar(CUDF_PROPS_FILENAME, "cudf", cudfPropsNames, conf) + detectMultipleJar(PLUGIN_PROPS_FILENAME, "rapids-4-spark", conf) + detectMultipleJar(JNI_PROPS_FILENAME, "spark-rapids-jni", conf) + detectMultipleJar(CUDF_PROPS_FILENAME, "cudf", conf) } // This assumes Apache Spark logic, if CSPs are setting defaults differently, we may need From 5dcc15f67e6876f798f6b0d9aab19c8bc5db9118 Mon Sep 17 00:00:00 2001 From: Haoyang Li Date: Wed, 15 Nov 2023 16:27:20 +0800 Subject: [PATCH 21/31] fix 2.13 build Signed-off-by: Haoyang Li --- .../src/main/scala/com/nvidia/spark/rapids/Plugin.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala index af30477cbe7..d1803f7a311 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala @@ -122,12 +122,12 @@ object RapidsPluginUtils extends Logging { private def detectMultipleJar(propFileName: String, jarName: String, conf: RapidsConf): Unit = { val classloader = ShimLoader.getShimClassLoader() - val possibleRapidsJarURLs = classloader.getResources(propFileName).asScala.toList + val possibleRapidsJarURLs = classloader.getResources(propFileName).asScala.toSeq val revisionMap: Map[String, Seq[URL]] = possibleRapidsJarURLs.map { url => val revision = getRevisionFromURL(url) (revision, url) - }.groupBy(_._1).mapValues(_.map(_._2)) + }.groupBy(_._1).mapValues(_.map(_._2)).toMap lazy val rapidsJarsVersMsg = revisionMap.map { case (revision, urls) => { s"revison: $revision\n" + urls.map { From 08f408808374b5ff07b7077cf89629d816a611d6 Mon Sep 17 00:00:00 2001 From: Haoyang Li Date: Wed, 22 Nov 2023 16:36:12 +0800 Subject: [PATCH 22/31] support both SAME_REVISION and NEVER mode Signed-off-by: Haoyang Li --- build/build-info | 5 +- pom.xml | 1 + scala2.13/pom.xml | 1 + .../com/nvidia/spark/rapids/Plugin.scala | 64 +++++++++++-------- .../com/nvidia/spark/rapids/RapidsConf.scala | 10 +-- 5 files changed, 49 insertions(+), 32 deletions(-) diff --git a/build/build-info b/build/build-info index 88cd8516d13..aa981273a49 100755 --- a/build/build-info +++ b/build/build-info @@ -1,7 +1,7 @@ #!/usr/bin/env bash # -# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -24,6 +24,7 @@ set -e echo_build_properties() { echo version=$1 echo cudf_version=$2 + echo project_name=$3 echo user=$USER echo revision=$(git rev-parse HEAD) echo branch=$(git rev-parse --abbrev-ref HEAD) @@ -31,4 +32,4 @@ echo_build_properties() { echo url=$(git config --get remote.origin.url) } -echo_build_properties $1 $2 +echo_build_properties $1 $2 $3 diff --git a/pom.xml b/pom.xml index afb519ffc03..96a97ebebda 100644 --- a/pom.xml +++ b/pom.xml @@ -982,6 +982,7 @@ + diff --git a/scala2.13/pom.xml b/scala2.13/pom.xml index 629692d6e65..d300dd7ba4d 100644 --- a/scala2.13/pom.xml +++ b/scala2.13/pom.xml @@ -982,6 +982,7 @@ + diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala index d1803f7a311..bee790d8d09 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala @@ -71,6 +71,10 @@ object RapidsPluginUtils extends Logging { private val EXECUTOR_GPU_AMOUNT_KEY = "spark.executor.resource.gpu.amount" private val SPARK_MASTER = "spark.master" + private val pluginPropsNames = Seq("rapids-4-spark-sql_2.12", "rapids-4-spark-sql_2.13") + private val jniPropsNames = Seq("spark-rapids-jni") + private val cudfPropsNames = Seq("cudf") + { val pluginProps = loadProps(RapidsPluginUtils.PLUGIN_PROPS_FILENAME) logInfo(s"RAPIDS Accelerator build: $pluginProps") @@ -112,21 +116,23 @@ object RapidsPluginUtils extends Logging { } } - private def getRevisionFromURL(jarURL: URL): String = { - scala.io.Source.fromURL(jarURL).getLines().toSeq - .filter(_.startsWith("revision=")) - .map(_.split("=").last) - .headOption - .getOrElse("UNKNOWN") - } - - private def detectMultipleJar(propFileName: String, jarName: String, conf: RapidsConf): Unit = { + private def detectMultipleJar(propName: String, jarName: String, propsName: Seq[String], + conf: RapidsConf): Unit = { val classloader = ShimLoader.getShimClassLoader() - val possibleRapidsJarURLs = classloader.getResources(propFileName).asScala.toSeq - - val revisionMap: Map[String, Seq[URL]] = possibleRapidsJarURLs.map { url => - val revision = getRevisionFromURL(url) - (revision, url) + val possibleRapidsJarURLs = classloader.getResources(propName).asScala.toSet.toSeq + val revisionMap: Map[String, Seq[URL]] = possibleRapidsJarURLs.flatMap { url => + val versionInfo = scala.io.Source.fromURL(url).getLines().toSeq + lazy val revision = versionInfo + .filter(_.startsWith("revision=")) + .map(_.split("=").last) + .headOption + .getOrElse("UNKNOWN") + val ver = versionInfo.mkString("") + if (propsName.exists(ver.contains(_))) { + Some((revision, url)) + } else { + None + } }.groupBy(_._1).mapValues(_.map(_._2)).toMap lazy val rapidsJarsVersMsg = revisionMap.map { case (revision, urls) => { @@ -136,23 +142,29 @@ object RapidsPluginUtils extends Logging { }.mkString } }.mkString - lazy val msg = s"Multiple $jarName jars with different revision found in the classpath:\n" + - s"$rapidsJarsVersMsg \nPlease make sure there is only one $jarName jar in the classpath." + - s" If it is impossible to fix the classpath you can suppress the error by setting " + - s"${RapidsConf.ALLOW_MULTIPLE_JARS.key} to true." - if (conf.allowMultipleJars) { - if (revisionMap.size > 1) { + lazy val msg = s"Multiple $jarName jars found in the classpath:\n $rapidsJarsVersMsg " + + s"\nPlease make sure there is only one $jarName jar in the classpath. " + + s"If it is impossible to fix the classpath you can suppress the error by setting " + + s"${RapidsConf.ALLOW_MULTIPLE_JARS.key} to SAME_REVISION or ALWAYS." + + conf.allowMultipleJars match { + case "ALWAYS" => logWarning(msg) - } - } else { - require(revisionMap.size == 1, msg) + case "SAME_REVISION" => + require(revisionMap.size == 1, msg) + case "NEVER" => + require(revisionMap.size == 1 && revisionMap.values.forall(_.size == 1), msg) + case _ => + throw new IllegalArgumentException(s"Invalid value for " + + s"${RapidsConf.ALLOW_MULTIPLE_JARS.key}: ${conf.allowMultipleJars}. " + + s"Valid values are ALWAYS, SAME_REVISION, NEVER.") } } def detectMultipleJars(conf: RapidsConf): Unit = { - detectMultipleJar(PLUGIN_PROPS_FILENAME, "rapids-4-spark", conf) - detectMultipleJar(JNI_PROPS_FILENAME, "spark-rapids-jni", conf) - detectMultipleJar(CUDF_PROPS_FILENAME, "cudf", conf) + detectMultipleJar(PLUGIN_PROPS_FILENAME, "rapids-4-spark", pluginPropsNames, conf) + detectMultipleJar(JNI_PROPS_FILENAME, "spark-rapids-jni", jniPropsNames, conf) + detectMultipleJar(CUDF_PROPS_FILENAME, "cudf", cudfPropsNames, conf) } // This assumes Apache Spark logic, if CSPs are setting defaults differently, we may need diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala index 119107d1c1c..631fb1a840f 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala @@ -1843,9 +1843,11 @@ object RapidsConf { .internal() .startupOnly() .doc("Allow multiple rapids-4-spark, spark-rapids-jni, and cudf jars on the classpath. " + - "Spark will take the first one it finds, so the version may not be expected. ") - .booleanConf - .createWithDefault(false) + "Spark will take the first one it finds, so the version may not be expected. Possisble " + + "values are ALWAYS: allow all jars, SAME_REVISION: only allow jars with the same " + + "revision, NEVER: do not allow multiple jars at all.") + .stringConf + .createWithDefault("SAME_REVISION") val ALLOW_DISABLE_ENTIRE_PLAN = conf("spark.rapids.allowDisableEntirePlan") .internal() @@ -2642,7 +2644,7 @@ class RapidsConf(conf: Map[String, String]) extends Logging { lazy val cudfVersionOverride: Boolean = get(CUDF_VERSION_OVERRIDE) - lazy val allowMultipleJars: Boolean = get(ALLOW_MULTIPLE_JARS) + lazy val allowMultipleJars: String = get(ALLOW_MULTIPLE_JARS) lazy val allowDisableEntirePlan: Boolean = get(ALLOW_DISABLE_ENTIRE_PLAN) From 0ca06b55382e62c9144c44b155cc2611c9363e3b Mon Sep 17 00:00:00 2001 From: Haoyang Li Date: Wed, 22 Nov 2023 20:43:12 +0800 Subject: [PATCH 23/31] Avoid CI change and filter out test Signed-off-by: Haoyang Li --- build/build-info | 5 +- pom.xml | 1 - scala2.13/pom.xml | 1 - .../com/nvidia/spark/rapids/Plugin.scala | 47 +++++++++---------- 4 files changed, 23 insertions(+), 31 deletions(-) diff --git a/build/build-info b/build/build-info index aa981273a49..88cd8516d13 100755 --- a/build/build-info +++ b/build/build-info @@ -1,7 +1,7 @@ #!/usr/bin/env bash # -# Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -24,7 +24,6 @@ set -e echo_build_properties() { echo version=$1 echo cudf_version=$2 - echo project_name=$3 echo user=$USER echo revision=$(git rev-parse HEAD) echo branch=$(git rev-parse --abbrev-ref HEAD) @@ -32,4 +31,4 @@ echo_build_properties() { echo url=$(git config --get remote.origin.url) } -echo_build_properties $1 $2 $3 +echo_build_properties $1 $2 diff --git a/pom.xml b/pom.xml index 96a97ebebda..afb519ffc03 100644 --- a/pom.xml +++ b/pom.xml @@ -982,7 +982,6 @@ - diff --git a/scala2.13/pom.xml b/scala2.13/pom.xml index d300dd7ba4d..629692d6e65 100644 --- a/scala2.13/pom.xml +++ b/scala2.13/pom.xml @@ -982,7 +982,6 @@ - diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala index bee790d8d09..b2e32beed43 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala @@ -71,10 +71,6 @@ object RapidsPluginUtils extends Logging { private val EXECUTOR_GPU_AMOUNT_KEY = "spark.executor.resource.gpu.amount" private val SPARK_MASTER = "spark.master" - private val pluginPropsNames = Seq("rapids-4-spark-sql_2.12", "rapids-4-spark-sql_2.13") - private val jniPropsNames = Seq("spark-rapids-jni") - private val cudfPropsNames = Seq("cudf") - { val pluginProps = loadProps(RapidsPluginUtils.PLUGIN_PROPS_FILENAME) logInfo(s"RAPIDS Accelerator build: $pluginProps") @@ -116,36 +112,35 @@ object RapidsPluginUtils extends Logging { } } - private def detectMultipleJar(propName: String, jarName: String, propsName: Seq[String], - conf: RapidsConf): Unit = { + private def detectMultipleJar(propName: String, jarName: String, conf: RapidsConf): Unit = { val classloader = ShimLoader.getShimClassLoader() - val possibleRapidsJarURLs = classloader.getResources(propName).asScala.toSet.toSeq - val revisionMap: Map[String, Seq[URL]] = possibleRapidsJarURLs.flatMap { url => + val possibleRapidsJarURLs = classloader.getResources(propName).asScala.toSet.toSeq.filterNot { + url => url.toString.contains("test") + } + val revisionMap: Map[String, Seq[URL]] = possibleRapidsJarURLs.map { url => val versionInfo = scala.io.Source.fromURL(url).getLines().toSeq lazy val revision = versionInfo .filter(_.startsWith("revision=")) .map(_.split("=").last) .headOption - .getOrElse("UNKNOWN") - val ver = versionInfo.mkString("") - if (propsName.exists(ver.contains(_))) { - Some((revision, url)) - } else { - None - } + .getOrElse("UNKNOWN") + (revision, url) }.groupBy(_._1).mapValues(_.map(_._2)).toMap lazy val rapidsJarsVersMsg = revisionMap.map { case (revision, urls) => { - s"revison: $revision\n" + urls.map { - url => "jar URL: " + url.toString.split("!").head + "\n" + - scala.io.Source.fromInputStream(url.openStream()).mkString("") - }.mkString + s"revison: $revision" + urls.map { + url => "\n\tjar URL: " + url.toString.split("!").head + "\n\t" + + scala.io.Source.fromURL(url).getLines().toSeq.mkString("\n\t") + }.mkString + "\n" } }.mkString - lazy val msg = s"Multiple $jarName jars found in the classpath:\n $rapidsJarsVersMsg " + - s"\nPlease make sure there is only one $jarName jar in the classpath. " + - s"If it is impossible to fix the classpath you can suppress the error by setting " + - s"${RapidsConf.ALLOW_MULTIPLE_JARS.key} to SAME_REVISION or ALWAYS." + // scalastyle:off line.size.limit + lazy val msg = s"""Multiple $jarName jars found in the classpath: + |$rapidsJarsVersMsg + |Please make sure there is only one $jarName jar in the classpath. + |If it is impossible to fix the classpath you can suppress the error by setting ${RapidsConf.ALLOW_MULTIPLE_JARS.key} to SAME_REVISION or ALWAYS. + """.stripMargin + // scalastyle:on line.size.limit conf.allowMultipleJars match { case "ALWAYS" => @@ -162,9 +157,9 @@ object RapidsPluginUtils extends Logging { } def detectMultipleJars(conf: RapidsConf): Unit = { - detectMultipleJar(PLUGIN_PROPS_FILENAME, "rapids-4-spark", pluginPropsNames, conf) - detectMultipleJar(JNI_PROPS_FILENAME, "spark-rapids-jni", jniPropsNames, conf) - detectMultipleJar(CUDF_PROPS_FILENAME, "cudf", cudfPropsNames, conf) + detectMultipleJar(PLUGIN_PROPS_FILENAME, "rapids-4-spark", conf) + detectMultipleJar(JNI_PROPS_FILENAME, "spark-rapids-jni", conf) + detectMultipleJar(CUDF_PROPS_FILENAME, "cudf", conf) } // This assumes Apache Spark logic, if CSPs are setting defaults differently, we may need From c7ca0add9809381a1adda5fc2c4aff8acc13b94f Mon Sep 17 00:00:00 2001 From: Haoyang Li Date: Thu, 23 Nov 2023 11:09:47 +0800 Subject: [PATCH 24/31] check values for config Signed-off-by: Haoyang Li --- .../src/main/scala/com/nvidia/spark/rapids/Plugin.scala | 4 ---- .../src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala | 2 ++ 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala index b2e32beed43..b74a70823f6 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala @@ -149,10 +149,6 @@ object RapidsPluginUtils extends Logging { require(revisionMap.size == 1, msg) case "NEVER" => require(revisionMap.size == 1 && revisionMap.values.forall(_.size == 1), msg) - case _ => - throw new IllegalArgumentException(s"Invalid value for " + - s"${RapidsConf.ALLOW_MULTIPLE_JARS.key}: ${conf.allowMultipleJars}. " + - s"Valid values are ALWAYS, SAME_REVISION, NEVER.") } } diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala index 631fb1a840f..7ffcaeb3e7b 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala @@ -1847,6 +1847,8 @@ object RapidsConf { "values are ALWAYS: allow all jars, SAME_REVISION: only allow jars with the same " + "revision, NEVER: do not allow multiple jars at all.") .stringConf + .transform(_.toUpperCase(java.util.Locale.ROOT)) + .checkValues(Set("ALWAYS", "SAME_REVISION", "NEVER")) .createWithDefault("SAME_REVISION") val ALLOW_DISABLE_ENTIRE_PLAN = conf("spark.rapids.allowDisableEntirePlan") From a833c73842815152c85a52f0b4c55524f440aaa4 Mon Sep 17 00:00:00 2001 From: Haoyang Li Date: Thu, 23 Nov 2023 11:22:47 +0800 Subject: [PATCH 25/31] use enum Signed-off-by: Haoyang Li --- .../com/nvidia/spark/rapids/Plugin.scala | 7 ++++--- .../com/nvidia/spark/rapids/RapidsConf.scala | 19 ++++++++++++++++--- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala index b74a70823f6..891b7f2e25f 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala @@ -26,6 +26,7 @@ import scala.sys.process._ import scala.util.Try import ai.rapids.cudf.{Cuda, CudaException, CudaFatalException, CudfException, MemoryCleaner} +import com.nvidia.spark.rapids.RapidsConf.AllowMultipleJars import com.nvidia.spark.rapids.filecache.{FileCache, FileCacheLocalityManager, FileCacheLocalityMsg} import com.nvidia.spark.rapids.python.PythonWorkerSemaphore import org.apache.commons.lang3.exception.ExceptionUtils @@ -143,11 +144,11 @@ object RapidsPluginUtils extends Logging { // scalastyle:on line.size.limit conf.allowMultipleJars match { - case "ALWAYS" => + case AllowMultipleJars.ALWAYS => logWarning(msg) - case "SAME_REVISION" => + case AllowMultipleJars.SAME_REVISION => require(revisionMap.size == 1, msg) - case "NEVER" => + case AllowMultipleJars.NEVER => require(revisionMap.size == 1 && revisionMap.values.forall(_.size == 1), msg) } } diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala index 7ffcaeb3e7b..a6bcf651e63 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala @@ -1839,6 +1839,10 @@ object RapidsConf { .booleanConf .createWithDefault(false) + object AllowMultipleJars extends Enumeration { + val ALWAYS, SAME_REVISION, NEVER = Value + } + val ALLOW_MULTIPLE_JARS = conf("spark.rapids.sql.allowMultipleJars") .internal() .startupOnly() @@ -1848,8 +1852,8 @@ object RapidsConf { "revision, NEVER: do not allow multiple jars at all.") .stringConf .transform(_.toUpperCase(java.util.Locale.ROOT)) - .checkValues(Set("ALWAYS", "SAME_REVISION", "NEVER")) - .createWithDefault("SAME_REVISION") + .checkValues(AllowMultipleJars.values.map(_.toString)) + .createWithDefault(AllowMultipleJars.SAME_REVISION.toString) val ALLOW_DISABLE_ENTIRE_PLAN = conf("spark.rapids.allowDisableEntirePlan") .internal() @@ -2646,7 +2650,16 @@ class RapidsConf(conf: Map[String, String]) extends Logging { lazy val cudfVersionOverride: Boolean = get(CUDF_VERSION_OVERRIDE) - lazy val allowMultipleJars: String = get(ALLOW_MULTIPLE_JARS) + lazy val allowMultipleJars: AllowMultipleJars.Value = { + get(ALLOW_MULTIPLE_JARS) match { + case "ALWAYS" => AllowMultipleJars.ALWAYS + case "NEVER" => AllowMultipleJars.NEVER + case "SAME_REVISION" => AllowMultipleJars.SAME_REVISION + case other => + throw new IllegalArgumentException(s"Internal Error $other is not supported for " + + s"${ALLOW_MULTIPLE_JARS.key}") + } + } lazy val allowDisableEntirePlan: Boolean = get(ALLOW_DISABLE_ENTIRE_PLAN) From f5bd0c958c1afc86a3e7906684809fc62e21d8ab Mon Sep 17 00:00:00 2001 From: Haoyang Li Date: Fri, 24 Nov 2023 09:35:53 +0800 Subject: [PATCH 26/31] fix two nits Signed-off-by: Haoyang Li --- .../src/main/scala/com/nvidia/spark/rapids/Plugin.scala | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala index 891b7f2e25f..ad8ed841a8f 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala @@ -118,11 +118,13 @@ object RapidsPluginUtils extends Logging { val possibleRapidsJarURLs = classloader.getResources(propName).asScala.toSet.toSeq.filterNot { url => url.toString.contains("test") } + val revisionRegex = "revision=(.*)".r val revisionMap: Map[String, Seq[URL]] = possibleRapidsJarURLs.map { url => val versionInfo = scala.io.Source.fromURL(url).getLines().toSeq - lazy val revision = versionInfo - .filter(_.startsWith("revision=")) - .map(_.split("=").last) + val revision = versionInfo + .collect { + case revisionRegex(revision) => revision + } .headOption .getOrElse("UNKNOWN") (revision, url) From 7d11a4ff004e53dac97819a387007b2497e2819e Mon Sep 17 00:00:00 2001 From: Haoyang Li Date: Fri, 24 Nov 2023 13:49:29 +0800 Subject: [PATCH 27/31] Do not print log if no multiple jar Signed-off-by: Haoyang Li --- .../src/main/scala/com/nvidia/spark/rapids/Plugin.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala index ad8ed841a8f..1941049def6 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala @@ -147,7 +147,9 @@ object RapidsPluginUtils extends Logging { conf.allowMultipleJars match { case AllowMultipleJars.ALWAYS => - logWarning(msg) + if (revisionMap.size != 1) { + logWarning(msg) + } case AllowMultipleJars.SAME_REVISION => require(revisionMap.size == 1, msg) case AllowMultipleJars.NEVER => From c4eef258b70fb67abdfdaec5a4e1c77087a0ade9 Mon Sep 17 00:00:00 2001 From: Haoyang Li Date: Mon, 27 Nov 2023 12:29:29 +0800 Subject: [PATCH 28/31] ignore subdir when checking multiple jars Signed-off-by: Haoyang Li --- .../main/scala/com/nvidia/spark/rapids/Plugin.scala | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala index 1941049def6..9914c82c70d 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala @@ -115,8 +115,12 @@ object RapidsPluginUtils extends Logging { private def detectMultipleJar(propName: String, jarName: String, conf: RapidsConf): Unit = { val classloader = ShimLoader.getShimClassLoader() - val possibleRapidsJarURLs = classloader.getResources(propName).asScala.toSet.toSeq.filterNot { - url => url.toString.contains("test") + val possibleRapidsJarURLs = classloader.getResources(propName).asScala.toSet.toSeq.filter { + url => { + val urlPath = url.toString + // filter out submodule jars and files stored under subdirs of '!/' + !urlPath.contains("rapids-4-spark-") && urlPath.contains("!/" + propName) + } } val revisionRegex = "revision=(.*)".r val revisionMap: Map[String, Seq[URL]] = possibleRapidsJarURLs.map { url => @@ -147,11 +151,14 @@ object RapidsPluginUtils extends Logging { conf.allowMultipleJars match { case AllowMultipleJars.ALWAYS => - if (revisionMap.size != 1) { + if (revisionMap.size != 1 || revisionMap.values.exists(_.size != 1)) { logWarning(msg) } case AllowMultipleJars.SAME_REVISION => require(revisionMap.size == 1, msg) + if (revisionMap.values.exists(_.size != 1)) { + logWarning(msg) + } case AllowMultipleJars.NEVER => require(revisionMap.size == 1 && revisionMap.values.forall(_.size == 1), msg) } From bf4477a46fa6c02676e9c52e6a58927b796ad6b4 Mon Sep 17 00:00:00 2001 From: Haoyang Li Date: Tue, 28 Nov 2023 09:51:23 +0800 Subject: [PATCH 29/31] Update sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala Co-authored-by: Gera Shegalov --- sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala index 9914c82c70d..b75c64318fb 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala @@ -119,7 +119,7 @@ object RapidsPluginUtils extends Logging { url => { val urlPath = url.toString // filter out submodule jars and files stored under subdirs of '!/' - !urlPath.contains("rapids-4-spark-") && urlPath.contains("!/" + propName) + !urlPath.contains("rapids-4-spark-") && urlPath.endsWith("!/" + propName) } } val revisionRegex = "revision=(.*)".r From 39e0b56f21f80319034a33ba3b72e12b08fd1cb2 Mon Sep 17 00:00:00 2001 From: Haoyang Li Date: Mon, 27 Nov 2023 19:31:11 +0800 Subject: [PATCH 30/31] wip ut --- .../spark/rapids/PluginUtilsSuite.scala | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 tests/src/test/scala/com/nvidia/spark/rapids/PluginUtilsSuite.scala diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/PluginUtilsSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/PluginUtilsSuite.scala new file mode 100644 index 00000000000..9c7a542f37d --- /dev/null +++ b/tests/src/test/scala/com/nvidia/spark/rapids/PluginUtilsSuite.scala @@ -0,0 +1,44 @@ +package com.nvidia.spark.rapids + +import org.scalatest.funsuite.AnyFunSuite + +class PluginUtilsSuite extends AnyFunSuite { + + test("detectMultipleJar - multiple jars found") { + val conf = new RapidsConf() + val plugin = new Plugin() + + // Set up the classloader with multiple jar resources + val classloader = new TestClassLoader() + classloader.addResource("rapids-4-spark-1.0.jar") + classloader.addResource("rapids-4-spark-2.0.jar") + classloader.addResource("rapids-4-spark-3.0.jar") + classloader.addResource("other.jar") + classloader.addResource("subdir/rapids-4-spark-4.0.jar") + classloader.addResource("subdir/rapids-4-spark-5.0.jar") + classloader.addResource("subdir/other.jar") + ShimLoader.setShimClassLoader(classloader) + + // Call the method under test + plugin.detectMultipleJar("rapids-4-spark", "Rapids", conf) + + // Assert the expected log warning message + val expectedMsg = + """Multiple Rapids jars found in the classpath: + |revison: UNKNOWN + | jar URL: jar:file:/home/haoyangl/spark-rapids/sql-plugin/target/scala-2.12/test-classes/!/rapids-4-spark-1.0.jar + | UNKNOWN + | jar URL: jar:file:/home/haoyangl/spark-rapids/sql-plugin/target/scala-2.12/test-classes/!/rapids-4-spark-2.0.jar + | UNKNOWN + | jar URL: jar:file:/home/haoyangl/spark-rapids/sql-plugin/target/scala-2.12/test-classes/!/rapids-4-spark-3.0.jar + | UNKNOWN + | + |Please make sure there is only one Rapids jar in the classpath. + |If it is impossible to fix the classpath you can suppress the error by setting allow.multiple.jars to SAME_REVISION or ALWAYS. + |""".stripMargin + assert(plugin.logWarningMsg == expectedMsg) + } + + // Add more test cases here... + +} \ No newline at end of file From c3279f42631ad4c0945c205be8b07a7edd078894 Mon Sep 17 00:00:00 2001 From: Haoyang Li Date: Tue, 28 Nov 2023 09:51:54 +0800 Subject: [PATCH 31/31] address comment Signed-off-by: Haoyang Li --- .../com/nvidia/spark/rapids/Plugin.scala | 6 ++- .../spark/rapids/PluginUtilsSuite.scala | 44 ------------------- 2 files changed, 5 insertions(+), 45 deletions(-) delete mode 100644 tests/src/test/scala/com/nvidia/spark/rapids/PluginUtilsSuite.scala diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala index b75c64318fb..0983e72652d 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala @@ -118,7 +118,11 @@ object RapidsPluginUtils extends Logging { val possibleRapidsJarURLs = classloader.getResources(propName).asScala.toSet.toSeq.filter { url => { val urlPath = url.toString - // filter out submodule jars and files stored under subdirs of '!/' + // Filter out submodule jars, e.g. rapids-4-spark-aggregator_2.12-23.12.0-spark341.jar, + // and files stored under subdirs of '!/', e.g. + // rapids-4-spark_2.12-23.12.0-cuda11.jar!/spark330/rapids4spark-version-info.properties + // We only want to find the main jar, e.g. + // rapids-4-spark_2.12-23.12.0-cuda11.jar!/rapids4spark-version-info.properties !urlPath.contains("rapids-4-spark-") && urlPath.endsWith("!/" + propName) } } diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/PluginUtilsSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/PluginUtilsSuite.scala deleted file mode 100644 index 9c7a542f37d..00000000000 --- a/tests/src/test/scala/com/nvidia/spark/rapids/PluginUtilsSuite.scala +++ /dev/null @@ -1,44 +0,0 @@ -package com.nvidia.spark.rapids - -import org.scalatest.funsuite.AnyFunSuite - -class PluginUtilsSuite extends AnyFunSuite { - - test("detectMultipleJar - multiple jars found") { - val conf = new RapidsConf() - val plugin = new Plugin() - - // Set up the classloader with multiple jar resources - val classloader = new TestClassLoader() - classloader.addResource("rapids-4-spark-1.0.jar") - classloader.addResource("rapids-4-spark-2.0.jar") - classloader.addResource("rapids-4-spark-3.0.jar") - classloader.addResource("other.jar") - classloader.addResource("subdir/rapids-4-spark-4.0.jar") - classloader.addResource("subdir/rapids-4-spark-5.0.jar") - classloader.addResource("subdir/other.jar") - ShimLoader.setShimClassLoader(classloader) - - // Call the method under test - plugin.detectMultipleJar("rapids-4-spark", "Rapids", conf) - - // Assert the expected log warning message - val expectedMsg = - """Multiple Rapids jars found in the classpath: - |revison: UNKNOWN - | jar URL: jar:file:/home/haoyangl/spark-rapids/sql-plugin/target/scala-2.12/test-classes/!/rapids-4-spark-1.0.jar - | UNKNOWN - | jar URL: jar:file:/home/haoyangl/spark-rapids/sql-plugin/target/scala-2.12/test-classes/!/rapids-4-spark-2.0.jar - | UNKNOWN - | jar URL: jar:file:/home/haoyangl/spark-rapids/sql-plugin/target/scala-2.12/test-classes/!/rapids-4-spark-3.0.jar - | UNKNOWN - | - |Please make sure there is only one Rapids jar in the classpath. - |If it is impossible to fix the classpath you can suppress the error by setting allow.multiple.jars to SAME_REVISION or ALWAYS. - |""".stripMargin - assert(plugin.logWarningMsg == expectedMsg) - } - - // Add more test cases here... - -} \ No newline at end of file