apache · szehon-ho · Dec 2, 2024 · Oct 31, 2024 · Oct 31, 2024 · Nov 4, 2024
diff --git a/...ions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteDataFilesProcedure.java b/...ions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteDataFilesProcedure.java
@@ -69,6 +69,60 @@ public void removeTable() {
     sql("DROP TABLE IF EXISTS %s", tableName(QUOTED_SPECIAL_CHARS_TABLE_NAME));
   }
 
+  @TestTemplate
+  public void testFailsByCaseSensitiveWhereSql() {
+    createTable();
+    insertData(10);
+    sql("set spark.sql.caseSensitive=true");
+    assertThatThrownBy(
+            () ->
+                sql(
+                    "CALL %s.system.rewrite_data_files(table=>'%s', where=>'C1 > 0')",
+                    catalogName, tableIdent))
+        .isInstanceOf(IllegalArgumentException.class)
+        .hasMessage("Cannot parse predicates in where option: C1 > 0");
+  }
+
+  @TestTemplate
+  public void testSucceedByCaseInsensitiveWhereSql() {
+    createTable();
+    sql("set spark.sql.caseSensitive=false");
+    assertEquals(
+        "Should have done nothing but passed the schema validation, since no files are present",
+        ImmutableList.of(row(0, 0, 0L, 0)),
+        sql(
+            "CALL %s.system.rewrite_data_files(table=>'%s', where=>'C1 > 0')",
+            catalogName, tableIdent));
+  }
+
+  @TestTemplate
+  public void testFailsByCaseSensitiveWhereSqlOnPartitionTable() {
+    createTruncatePartitionTable();
+    insertData(10);
+    sql("set spark.sql.caseSensitive=true");
+    assertThatThrownBy(
+            () ->
+                sql(
+                    "CALL %s.system.rewrite_data_files("
+                        + "table=>'%s', where=>\"C2 > 'a' and substr('110111',1,3)='110'\")",
+                    catalogName, tableIdent))
+        .isInstanceOf(IllegalArgumentException.class)
+        .hasMessage(
+            "Cannot parse predicates in where option: C2 > 'a' and substr('110111',1,3)='110'");
+  }
+
+  @TestTemplate
+  public void testSucceedByCaseInsensitiveWhereSqlOnPartitionTable() {
+    createTruncatePartitionTable();
+    sql("set spark.sql.caseSensitive=false");
+    assertEquals(
+        "Should have done nothing but passed the schema validation, since no files are present",
+        ImmutableList.of(row(0, 0, 0L, 0)),
+        sql(
+            "CALL %s.system.rewrite_data_files(table=>'%s', where=>\"C2 > 'a'\")",
+            catalogName, tableIdent));
+  }
+
   @TestTemplate
   public void testZOrderSortExpression() {
     List<ExtendedParser.RawOrderField> order =
@@ -923,6 +977,17 @@ private void createBucketPartitionTable() {
         TableProperties.WRITE_DISTRIBUTION_MODE_NONE);
   }
 
+  private void createTruncatePartitionTable() {
+    sql(
+        "CREATE TABLE %s (c1 int, c2 string, c3 string) "
+            + "USING iceberg "
+            + "PARTITIONED BY (truncate(1, c2)) "
+            + "TBLPROPERTIES ('%s' '%s')",
+        tableName,
+        TableProperties.WRITE_DISTRIBUTION_MODE,
+        TableProperties.WRITE_DISTRIBUTION_MODE_NONE);
+  }
+
   private void insertData(int filesCount) {
     insertData(tableName, filesCount);
   }

diff --git a/...3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteDataFilesSparkAction.java b/...3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteDataFilesSparkAction.java
@@ -102,11 +102,13 @@ public class RewriteDataFilesSparkAction
   private boolean useStartingSequenceNumber;
   private RewriteJobOrder rewriteJobOrder;
   private FileRewriter<FileScanTask, DataFile> rewriter = null;
+  private boolean caseSensitive;
 
   RewriteDataFilesSparkAction(SparkSession spark, Table table) {
     super(spark.cloneSession());
     // Disable Adaptive Query Execution as this may change the output partitioning of our write
     spark().conf().set(SQLConf.ADAPTIVE_EXECUTION_ENABLED().key(), false);
+    this.caseSensitive = (boolean) spark().conf().get(SQLConf.CASE_SENSITIVE(), true);
     this.table = table;
   }
 
@@ -198,6 +200,7 @@ StructLikeMap<List<List<FileScanTask>>> planFileGroups(long startingSnapshotId)
         table
             .newScan()
             .useSnapshot(startingSnapshotId)
+            .caseSensitive(caseSensitive)
             .filter(filter)
             .ignoreResiduals()
             .planFiles();