apache · viirya · Aug 5, 2024 · Aug 4, 2024
diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
@@ -2936,7 +2936,9 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde with CometExprShim
           _: DateType | _: BooleanType =>
         true
       case StructType(fields) =>
-        fields.forall(f => supportedDataType(f.dataType))
+        fields.forall(f => supportedDataType(f.dataType)) &&
+        // Java Arrow stream reader cannot work on duplicate field name
+        fields.map(f => f.name).distinct.length == fields.length
       case ArrayType(ArrayType(_, _), _) => false // TODO: nested array is not supported
       case ArrayType(MapType(_, _, _), _) => false // TODO: map array element is not supported
       case ArrayType(elementType, _) =>

diff --git a/spark/src/test/scala/org/apache/comet/exec/CometColumnarShuffleSuite.scala b/spark/src/test/scala/org/apache/comet/exec/CometColumnarShuffleSuite.scala
@@ -68,6 +68,16 @@ abstract class CometColumnarShuffleSuite extends CometTestBase with AdaptiveSpar
 
   setupTestData()
 
+  test("Fallback to Spark when shuffling on struct with duplicate field name") {
+    val df = sql("""
+        | SELECT max(struct(a, record.*, b)) as r FROM
+        |   (select a as a, b as b, struct(a,b) as record from testData2) tmp
+        | GROUP BY a
+      """.stripMargin).select($"r.*")
+
+    checkSparkAnswer(df)
+  }
+
   test("Unsupported types for SinglePartition should fallback to Spark") {
     checkSparkAnswer(spark.sql("""
         |SELECT