Skip to content

Commit

Permalink
ORC-1683: Fix instanceof of BinaryStatisticsImpl merge method
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?
This PR aims to fix `instanceof` of `BinaryStatisticsImpl` merge method.

### Why are the changes needed?
In [ORC-1542](https://issues.apache.org/jira/browse/ORC-1542), we modified part of instanceof, but `BinaryStatisticsImpl` was not modified because the merge method was written differently.

However, the current code is instanceof `BinaryColumnStatistics` and then explicitly cast `BinaryStatisticsImpl`, so it should be replaced by the new instanceof writing method (`Pattern Matching for instanceof`).

```java
if (other instanceof BinaryColumnStatistics) {
        BinaryStatisticsImpl bin = (BinaryStatisticsImpl) other;
```

### How was this patch tested?
GA

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #1883 from cxzl25/ORC-1683.

Authored-by: sychen <[email protected]>
Signed-off-by: Shaoyun Chen <[email protected]>
  • Loading branch information
cxzl25 committed Aug 23, 2024
1 parent 2693330 commit abab96d
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 2 deletions.
23 changes: 21 additions & 2 deletions java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ public void updateBoolean(boolean value, int repetitions) {
public void merge(ColumnStatisticsImpl other) {
if (other instanceof BooleanStatisticsImpl bkt) {
trueCount += bkt.trueCount;
} else if (!(other instanceof BooleanColumnStatistics)) {
throw new IllegalArgumentException("Incompatible merging of boolean column statistics");
} else {
if (isStatsExists() && trueCount != 0) {
throw new IllegalArgumentException("Incompatible merging of boolean column statistics");
Expand Down Expand Up @@ -222,6 +224,8 @@ public void merge(ColumnStatisticsImpl other) {
}
}
sum += otherColl.sum;
} else if (!(other instanceof CollectionColumnStatistics)) {
throw new IllegalArgumentException("Incompatible merging of collection column statistics");
} else {
if (isStatsExists()) {
throw new IllegalArgumentException("Incompatible merging of collection column statistics");
Expand Down Expand Up @@ -397,6 +401,8 @@ public void merge(ColumnStatisticsImpl other) {
overflow = true;
}
}
} else if (!(other instanceof IntegerColumnStatistics)) {
throw new IllegalArgumentException("Incompatible merging of integer column statistics");
} else {
if (isStatsExists() && hasMinimum) {
throw new IllegalArgumentException("Incompatible merging of integer column statistics");
Expand Down Expand Up @@ -560,6 +566,8 @@ public void merge(ColumnStatisticsImpl other) {
}
}
sum += dbl.sum;
} else if (!(other instanceof DoubleColumnStatistics)) {
throw new IllegalArgumentException("Incompatible merging of double column statistics");
} else {
if (isStatsExists() && hasMinimum) {
throw new IllegalArgumentException("Incompatible merging of double column statistics");
Expand Down Expand Up @@ -763,6 +771,8 @@ public void merge(ColumnStatisticsImpl other) {
}
}
sum += str.sum;
} else if (!(other instanceof StringColumnStatistics)) {
throw new IllegalArgumentException("Incompatible merging of string column statistics");
} else {
if (isStatsExists()) {
throw new IllegalArgumentException("Incompatible merging of string column statistics");
Expand Down Expand Up @@ -993,9 +1003,10 @@ public void updateBinary(byte[] bytes, int offset, int length,

@Override
public void merge(ColumnStatisticsImpl other) {
if (other instanceof BinaryColumnStatistics) {
BinaryStatisticsImpl bin = (BinaryStatisticsImpl) other;
if (other instanceof BinaryStatisticsImpl bin) {
sum += bin.sum;
} else if (!(other instanceof BinaryColumnStatistics)) {
throw new IllegalArgumentException("Incompatible merging of binary column statistics");
} else {
if (isStatsExists() && sum != 0) {
throw new IllegalArgumentException("Incompatible merging of binary column statistics");
Expand Down Expand Up @@ -1128,6 +1139,8 @@ public void merge(ColumnStatisticsImpl other) {
sum.mutateAdd(dec.sum);
}
}
} else if (!(other instanceof DecimalColumnStatistics)) {
throw new IllegalArgumentException("Incompatible merging of decimal column statistics");
} else {
if (isStatsExists() && minimum != null) {
throw new IllegalArgumentException("Incompatible merging of decimal column statistics");
Expand Down Expand Up @@ -1321,6 +1334,8 @@ public void merge(ColumnStatisticsImpl other) {
hasSum = false;
}
}
} else if (!(other instanceof DecimalColumnStatistics)) {
throw new IllegalArgumentException("Incompatible merging of decimal column statistics");
} else {
if (other.getNumberOfValues() != 0) {
throw new IllegalArgumentException("Incompatible merging of decimal column statistics");
Expand Down Expand Up @@ -1486,6 +1501,8 @@ public void merge(ColumnStatisticsImpl other) {
if (other instanceof DateStatisticsImpl dateStats) {
minimum = Math.min(minimum, dateStats.minimum);
maximum = Math.max(maximum, dateStats.maximum);
} else if (!(other instanceof DateColumnStatistics)) {
throw new IllegalArgumentException("Incompatible merging of date column statistics");
} else {
if (isStatsExists() && count != 0) {
throw new IllegalArgumentException("Incompatible merging of date column statistics");
Expand Down Expand Up @@ -1698,6 +1715,8 @@ public void merge(ColumnStatisticsImpl other) {
maximum = timestampStats.maximum;
}
}
} else if (!(other instanceof TimestampColumnStatistics)) {
throw new IllegalArgumentException("Incompatible merging of timestamp column statistics");
} else {
if (isStatsExists() && count != 0) {
throw new IllegalArgumentException("Incompatible merging of timestamp column statistics");
Expand Down
43 changes: 43 additions & 0 deletions java/core/src/test/org/apache/orc/TestColumnStatistics.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.orc.impl.ColumnStatisticsImpl;
import org.junit.jupiter.api.BeforeEach;
Expand All @@ -44,6 +45,7 @@
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;

/**
Expand Down Expand Up @@ -699,6 +701,47 @@ public void testDecimalMinMaxStatistics() throws Exception {
"Incorrect minimum value");
}

@Test
public void testBinaryMerge() {
TypeDescription schema = TypeDescription.createBinary();

ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(schema);
ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(schema);
stats1.increment(3);
stats1.updateBinary(new BytesWritable("bob".getBytes(StandardCharsets.UTF_8)));
stats1.updateBinary(new BytesWritable("david".getBytes(StandardCharsets.UTF_8)));
stats1.updateBinary(new BytesWritable("charles".getBytes(StandardCharsets.UTF_8)));
stats2.increment(2);
stats2.updateBinary(new BytesWritable("anne".getBytes(StandardCharsets.UTF_8)));
stats2.updateBinary(new BytesWritable("abcdef".getBytes(StandardCharsets.UTF_8)));

assertEquals(15, ((BinaryColumnStatistics) stats1).getSum());
assertEquals(10, ((BinaryColumnStatistics) stats2).getSum());

stats1.merge(stats2);

assertEquals(25, ((BinaryColumnStatistics) stats1).getSum());
}

@Test
public void testMergeIncompatible() {
TypeDescription stringSchema = TypeDescription.createString();
ColumnStatisticsImpl stringStats = ColumnStatisticsImpl.create(stringSchema);

TypeDescription doubleSchema = TypeDescription.createDouble();
ColumnStatisticsImpl doubleStats = ColumnStatisticsImpl.create(doubleSchema);

stringStats.increment(3);
stringStats.updateString(new Text("bob"));
stringStats.updateString(new Text("david"));
stringStats.updateString(new Text("charles"));

assertThrows(IllegalArgumentException.class, () -> {
doubleStats.merge(stringStats);
});

assertEquals(0, ((DoubleColumnStatistics) doubleStats).getNumberOfValues());
}

Path workDir = new Path(System.getProperty("test.tmp.dir",
"target" + File.separator + "test" + File.separator + "tmp"));
Expand Down

0 comments on commit abab96d

Please sign in to comment.