From c1b7c7d2a8e2cf2802ad3846ece3e38c98def99c Mon Sep 17 00:00:00 2001 From: KAZUYUKI TANIMURA Date: Tue, 23 Jul 2024 17:49:25 -0700 Subject: [PATCH] fix: dictionary decimal vector optimization (#705) ## Which issue does this PR close? Part of #679 and #670 Related #490 ## Rationale for this change For dictionary decimal vectors, it was unpacking even for Int and Long decimals that used more memory than necessary. ## What changes are included in this PR? Unpack only for Decimal 128 ## How are these changes tested? Existing test --- .../apache/comet/vector/CometDictionary.java | 31 ++++++++----------- 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/common/src/main/java/org/apache/comet/vector/CometDictionary.java b/common/src/main/java/org/apache/comet/vector/CometDictionary.java index 24a6d6d8c..fd5c2410b 100644 --- a/common/src/main/java/org/apache/comet/vector/CometDictionary.java +++ b/common/src/main/java/org/apache/comet/vector/CometDictionary.java @@ -35,7 +35,6 @@ public class CometDictionary implements AutoCloseable { public CometDictionary(CometPlainVector values) { this.values = values; this.numValues = values.numValues(); - initialize(); } public void setDictionaryVector(CometPlainVector values) { @@ -83,6 +82,19 @@ public byte[] decodeToBinary(int index) { case FIXEDSIZEBINARY: return values.getBinary(index); case DECIMAL: + if (binaries == null) { + // We only need to copy values for decimal 128 type as random access + // to the dictionary is not efficient for decimal (it needs to copy + // the value to a new byte array everytime). + ByteArrayWrapper[] binaries = new ByteArrayWrapper[numValues]; + for (int i = 0; i < numValues; i++) { + // Need copying here since we re-use byte array for decimal + byte[] bytes = new byte[DECIMAL_BYTE_WIDTH]; + bytes = values.copyBinaryDecimal(i, bytes); + binaries[i] = new ByteArrayWrapper(bytes); + } + this.binaries = binaries; + } return binaries[index].bytes; default: throw new IllegalArgumentException( @@ -99,23 +111,6 @@ public void close() { values.close(); } - private void initialize() { - switch (values.getValueVector().getMinorType()) { - case DECIMAL: - // We only need to copy values for decimal type as random access - // to the dictionary is not efficient for decimal (it needs to copy - // the value to a new byte array everytime). - binaries = new ByteArrayWrapper[numValues]; - for (int i = 0; i < numValues; i++) { - // Need copying here since we re-use byte array for decimal - byte[] bytes = new byte[DECIMAL_BYTE_WIDTH]; - bytes = values.copyBinaryDecimal(i, bytes); - binaries[i] = new ByteArrayWrapper(bytes); - } - break; - } - } - private static class ByteArrayWrapper { private final byte[] bytes;