From 6a0587832062eb7060d582976740fd0b44607b89 Mon Sep 17 00:00:00 2001 From: rui-mo Date: Wed, 29 Jan 2025 16:38:48 -0800 Subject: [PATCH] fix: Fix withIntDistributionForField copy (#11644) Summary: For decimal(8, 5) data build, Arrow reports below error due to out of range. The reason is in the copy of 'withIntDistributionForField', the target index and source index are reversed. ``` `C++ exception with description "Invalid: Invalid cast from Decimal128 to 4 byte integer" thrown in the test body. ``` Pull Request resolved: https://github.com/facebookincubator/velox/pull/11644 Reviewed By: Yuhta Differential Revision: D66560309 Pulled By: bikramSingh91 fbshipit-source-id: bd594f67a58b92915ea3bf24bee3a311db11eb04 --- velox/dwio/common/tests/utils/DataSetBuilder.h | 2 +- velox/dwio/parquet/tests/reader/E2EFilterTest.cpp | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/velox/dwio/common/tests/utils/DataSetBuilder.h b/velox/dwio/common/tests/utils/DataSetBuilder.h index 4893c28336f6..e0111be1d552 100644 --- a/velox/dwio/common/tests/utils/DataSetBuilder.h +++ b/velox/dwio/common/tests/utils/DataSetBuilder.h @@ -98,7 +98,7 @@ class DataSetBuilder { if (counter % 100 < repeats) { numbers->set(row, T(counter % repeats)); } else if (counter % 100 > 90 && row > 0) { - numbers->copy(numbers, row - 1, row, 1); + numbers->copy(numbers, row, row - 1, 1); } else { int64_t value; if (rareFrequency && counter % rareFrequency == 0) { diff --git a/velox/dwio/parquet/tests/reader/E2EFilterTest.cpp b/velox/dwio/parquet/tests/reader/E2EFilterTest.cpp index 6cb2be529c41..455349d77ed6 100644 --- a/velox/dwio/parquet/tests/reader/E2EFilterTest.cpp +++ b/velox/dwio/parquet/tests/reader/E2EFilterTest.cpp @@ -357,9 +357,11 @@ TEST_F(E2EFilterTest, floatAndDouble) { } TEST_F(E2EFilterTest, shortDecimalDictionary) { + // decimal(8, 5) maps to 4 bytes FLBA in Parquet. // decimal(10, 5) maps to 5 bytes FLBA in Parquet. // decimal(17, 5) maps to 8 bytes FLBA in Parquet. for (const auto& type : { + "shortdecimal_val:decimal(8, 5)", "shortdecimal_val:decimal(10, 5)", "shortdecimal_val:decimal(17, 5)", }) { @@ -386,9 +388,11 @@ TEST_F(E2EFilterTest, shortDecimalDirect) { options_.enableDictionary = false; options_.dataPageSize = 4 * 1024; + // decimal(8, 5) maps to 4 bytes FLBA in Parquet. // decimal(10, 5) maps to 5 bytes FLBA in Parquet. // decimal(17, 5) maps to 8 bytes FLBA in Parquet. for (const auto& type : { + "shortdecimal_val:decimal(8, 5)", "shortdecimal_val:decimal(10, 5)", "shortdecimal_val:decimal(17, 5)", }) {