From ea1acfc2409d3bb0047780012dc00dcb7d44b92f Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 10 Jan 2024 19:45:22 +0000 Subject: [PATCH 1/4] fix regression against Spark 3.2.x --- .../com/nvidia/spark/rapids/shims/GpuJsonToStructsShim.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuJsonToStructsShim.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuJsonToStructsShim.scala index 0c94c5c1e1f..eae06eb0220 100644 --- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuJsonToStructsShim.scala +++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuJsonToStructsShim.scala @@ -82,8 +82,7 @@ object GpuJsonToStructsShim { def tagTimestampFormatSupport(meta: RapidsMeta[_, _, _], timestampFormat: Option[String]): Unit = { - // we only support the case where no format is specified - timestampFormat.foreach(f => meta.willNotWorkOnGpu(s"Unsupported timestampFormat: $f")) + // timestampFormat is ignored } def castJsonStringToTimestamp(input: ColumnVector, From 93a8252abff6b2ec043c313e6171596e997e1c43 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 10 Jan 2024 12:47:49 -0700 Subject: [PATCH 2/4] add comments --- .../com/nvidia/spark/rapids/shims/GpuJsonToStructsShim.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuJsonToStructsShim.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuJsonToStructsShim.scala index eae06eb0220..eb48f1616cc 100644 --- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuJsonToStructsShim.scala +++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuJsonToStructsShim.scala @@ -56,6 +56,8 @@ object GpuJsonToStructsShim { } def tagDateFormatSupportFromScan(meta: RapidsMeta[_, _, _], dateFormat: Option[String]): Unit = { + // dateFormat is ignored by JsonToStructs in Spark 3.2.x and 3.3.x because it just + // performs a regular cast from string to date } def castJsonStringToDateFromScan(input: ColumnVector, dt: DType, @@ -82,7 +84,8 @@ object GpuJsonToStructsShim { def tagTimestampFormatSupport(meta: RapidsMeta[_, _, _], timestampFormat: Option[String]): Unit = { - // timestampFormat is ignored + // timestampFormat is ignored by JsonToStructs in Spark 3.2.x and 3.3.x because it just + // performs a regular cast from string to timestamp } def castJsonStringToTimestamp(input: ColumnVector, From a453c804937c4296bd66f58dc070d116b6877184 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 10 Jan 2024 12:49:23 -0700 Subject: [PATCH 3/4] Revert a change --- .../com/nvidia/spark/rapids/shims/GpuJsonToStructsShim.scala | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuJsonToStructsShim.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuJsonToStructsShim.scala index eb48f1616cc..bce2ddfa66c 100644 --- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuJsonToStructsShim.scala +++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuJsonToStructsShim.scala @@ -47,7 +47,7 @@ object GpuJsonToStructsShim { } def castJsonStringToDate(input: ColumnVector, options: Map[String, String]): ColumnVector = { - // dateFormat is ignored in from_json in Spark 3.2 + // dateFormat is ignored in from_json in Spark 3.2.x and 3.3.x withResource(Scalar.fromString(" ")) { space => withResource(input.strip(space)) { trimmed => GpuCast.castStringToDate(trimmed) @@ -56,8 +56,6 @@ object GpuJsonToStructsShim { } def tagDateFormatSupportFromScan(meta: RapidsMeta[_, _, _], dateFormat: Option[String]): Unit = { - // dateFormat is ignored by JsonToStructs in Spark 3.2.x and 3.3.x because it just - // performs a regular cast from string to date } def castJsonStringToDateFromScan(input: ColumnVector, dt: DType, From ce3b264d0d920fa36579b3c98214ffd7ff31e9f6 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 10 Jan 2024 13:05:21 -0700 Subject: [PATCH 4/4] signoff Signed-off-by: Andy Grove