-
Notifications
You must be signed in to change notification settings - Fork 326
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Snowflake Dialect pt. 4 - reading a column of small integers as Integ…
…er type, other type mapping tests (#10518) - Related to #9486 - Ensures that even though an integer column in Snowflake is represented by `Decimal` type, if the values are small enough, they are materialized as `Integer`. - If the values are larger, they are still read in as `Decimal`. - Adds tests for some other `Decimal` edge cases (various precisions and scales), and for `Float`.
- Loading branch information
Showing
5 changed files
with
217 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
158 changes: 158 additions & 0 deletions
158
std-bits/snowflake/src/main/java/org/enso/snowflake/SnowflakeIntegerColumnMaterializer.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
package org.enso.snowflake; | ||
|
||
import java.math.BigInteger; | ||
import java.util.Arrays; | ||
import java.util.BitSet; | ||
import org.enso.table.data.column.builder.Builder; | ||
import org.enso.table.data.column.storage.Storage; | ||
import org.enso.table.data.column.storage.numeric.BigIntegerStorage; | ||
import org.enso.table.data.column.storage.numeric.LongStorage; | ||
import org.enso.table.data.column.storage.type.BigIntegerType; | ||
import org.enso.table.data.column.storage.type.IntegerType; | ||
import org.enso.table.data.column.storage.type.StorageType; | ||
import org.enso.table.error.ValueTypeMismatchException; | ||
import org.graalvm.polyglot.Context; | ||
|
||
public class SnowflakeIntegerColumnMaterializer extends Builder { | ||
private static final BigInteger LONG_MIN = BigInteger.valueOf(Long.MIN_VALUE); | ||
private static final BigInteger LONG_MAX = BigInteger.valueOf(Long.MAX_VALUE); | ||
// We start in integer mode and will switch to BigInteger mode if we encounter a value that | ||
// exceeds the range | ||
private long[] ints; | ||
private BitSet intsMissing; | ||
private BigInteger[] bigInts; | ||
private int currentSize; | ||
private Mode mode; | ||
|
||
public SnowflakeIntegerColumnMaterializer(int initialCapacity) { | ||
ints = new long[initialCapacity]; | ||
intsMissing = new BitSet(); | ||
bigInts = null; | ||
currentSize = 0; | ||
mode = Mode.LONG; | ||
} | ||
|
||
private void retypeToBigIntegers() { | ||
assert mode == Mode.LONG; | ||
Context context = Context.getCurrent(); | ||
bigInts = new BigInteger[ints.length]; | ||
for (int i = 0; i < currentSize; i++) { | ||
if (intsMissing.get(i)) { | ||
bigInts[i] = null; | ||
} else { | ||
bigInts[i] = BigInteger.valueOf(ints[i]); | ||
} | ||
|
||
context.safepoint(); | ||
} | ||
|
||
ints = null; | ||
intsMissing = null; | ||
mode = Mode.BIG_INTEGER; | ||
} | ||
|
||
private boolean fitsInLong(BigInteger bigInteger) { | ||
return bigInteger.compareTo(LONG_MIN) >= 0 && bigInteger.compareTo(LONG_MAX) <= 0; | ||
} | ||
|
||
@Override | ||
public void appendNoGrow(Object o) { | ||
switch (o) { | ||
case BigInteger bigInteger -> { | ||
switch (mode) { | ||
case BIG_INTEGER -> bigInts[currentSize++] = bigInteger; | ||
|
||
case LONG -> { | ||
if (fitsInLong(bigInteger)) { | ||
ints[currentSize++] = bigInteger.longValue(); | ||
} else { | ||
retypeToBigIntegers(); | ||
bigInts[currentSize++] = bigInteger; | ||
} | ||
} | ||
} | ||
} | ||
|
||
case null -> appendNulls(1); | ||
default -> throw new ValueTypeMismatchException(BigIntegerType.INSTANCE, o); | ||
} | ||
} | ||
|
||
@Override | ||
public void append(Object o) { | ||
if (currentSize >= capacity()) { | ||
grow(); | ||
} | ||
|
||
appendNoGrow(o); | ||
} | ||
|
||
@Override | ||
public void appendNulls(int count) { | ||
if (mode == Mode.LONG) { | ||
intsMissing.set(currentSize, currentSize + count); | ||
} | ||
|
||
currentSize += count; | ||
} | ||
|
||
@Override | ||
public void appendBulkStorage(Storage<?> storage) { | ||
throw new IllegalStateException( | ||
"SnowflakeIntegerColumnMaterializer.appendBulkStorage: Not supported."); | ||
} | ||
|
||
@Override | ||
public int getCurrentSize() { | ||
return currentSize; | ||
} | ||
|
||
@Override | ||
public Storage<?> seal() { | ||
resize(currentSize); | ||
return switch (mode) { | ||
case LONG -> new LongStorage(ints, currentSize, intsMissing, IntegerType.INT_64); | ||
case BIG_INTEGER -> new BigIntegerStorage(bigInts, currentSize); | ||
}; | ||
} | ||
|
||
@Override | ||
public StorageType getType() { | ||
// The type of the builder can change over time, so we do not report any stable type here. | ||
// Same as in InferredBuilder. | ||
return null; | ||
} | ||
|
||
private int capacity() { | ||
return mode == Mode.LONG ? ints.length : bigInts.length; | ||
} | ||
|
||
private void grow() { | ||
int desiredCapacity = 3; | ||
if (capacity() > 1) { | ||
desiredCapacity = (capacity() * 3 / 2); | ||
} | ||
|
||
// It is possible for the `currentSize` to grow arbitrarily larger than | ||
// the capacity, because when nulls are being added the array is not | ||
// resized, only the counter is incremented. Thus, we need to ensure | ||
// that we have allocated enough space for at least one element. | ||
if (currentSize >= desiredCapacity) { | ||
desiredCapacity = currentSize + 1; | ||
} | ||
|
||
resize(desiredCapacity); | ||
} | ||
|
||
private void resize(int desiredCapacity) { | ||
switch (mode) { | ||
case LONG -> ints = Arrays.copyOf(ints, desiredCapacity); | ||
case BIG_INTEGER -> bigInts = Arrays.copyOf(bigInts, desiredCapacity); | ||
} | ||
} | ||
|
||
private enum Mode { | ||
LONG, | ||
BIG_INTEGER | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters