From d9b7a98b7adb17dc821eafea7adefd1f99978130 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 29 Jan 2025 16:59:17 -0500 Subject: [PATCH] Avoid converting Decimal32/Decimal64 in `to_arrow` and `from_arrow` APIs (#17422) Now that the Arrow format includes `Decimal32` and `Decimal64` data types, CUDF no longer needs to convert them to decimal128 when importing/exporting values via the `to_arrow` and `from_arrow` APIs. Instead we can just treat them like any other fixed-width data type and use the buffers directly. This doesn't fully address https://github.com/rapidsai/cudf/issues/17080 as it doesn't make any changes to the Parquet side of things This also incorporates the changes from https://github.com/rapidsai/cudf/pull/17405 which are needed for debug tests. That should get merged first, and then I can rebase this. Authors: - Matt Topol (https://github.com/zeroshade) - David Wendt (https://github.com/davidwendt) - GALI PREM SAGAR (https://github.com/galipremsagar) - Bradley Dice (https://github.com/bdice) Approvers: - Paul Mattione (https://github.com/pmattione-nvidia) - Bradley Dice (https://github.com/bdice) - Lawrence Mitchell (https://github.com/wence-) - GALI PREM SAGAR (https://github.com/galipremsagar) - Robert (Bobby) Evans (https://github.com/revans2) - David Wendt (https://github.com/davidwendt) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/17422 --- conda/recipes/cudf/meta.yaml | 2 +- conda/recipes/pylibcudf/meta.yaml | 2 +- cpp/cmake/thirdparty/get_arrow.cmake | 4 +- cpp/cmake/thirdparty/get_nanoarrow.cmake | 6 +- .../patches/nanoarrow_override.json | 18 ++ cpp/src/interop/arrow_utilities.cpp | 6 +- cpp/src/interop/arrow_utilities.hpp | 18 +- cpp/src/interop/from_arrow_device.cu | 9 +- cpp/src/interop/from_arrow_host.cu | 11 +- cpp/src/interop/to_arrow_device.cu | 102 +------- cpp/src/interop/to_arrow_host.cu | 33 +-- cpp/src/interop/to_arrow_schema.cpp | 21 +- cpp/tests/interop/arrow_utils.hpp | 50 ++-- cpp/tests/interop/from_arrow_device_test.cpp | 81 ++++--- cpp/tests/interop/from_arrow_host_test.cpp | 222 +++++++++++++++++- cpp/tests/interop/from_arrow_test.cpp | 100 +++++--- cpp/tests/interop/nanoarrow_utils.hpp | 34 ++- cpp/tests/interop/to_arrow_device_test.cpp | 66 +----- cpp/tests/interop/to_arrow_host_test.cpp | 38 +-- cpp/tests/interop/to_arrow_test.cpp | 98 ++++---- .../test/java/ai/rapids/cudf/TableTest.java | 6 + python/cudf/cudf/tests/test_reductions.py | 163 +++++++++++-- python/cudf/cudf/tests/test_scalar.py | 62 ++++- 23 files changed, 735 insertions(+), 417 deletions(-) create mode 100644 cpp/cmake/thirdparty/patches/nanoarrow_override.json diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml index 5df7f97346a..83651c7972b 100644 --- a/conda/recipes/cudf/meta.yaml +++ b/conda/recipes/cudf/meta.yaml @@ -81,7 +81,7 @@ requirements: - numba-cuda >=0.2.0,<0.3.0a0 - numba >=0.59.1,<0.61.0a0 - numpy >=1.23,<3.0a0 - - pyarrow>=14.0.0,<18.0.0a0 + - pyarrow>=14.0.0,<20.0.0a0 - libcudf ={{ version }} - pylibcudf ={{ version }} - {{ pin_compatible('rmm', max_pin='x.x') }} diff --git a/conda/recipes/pylibcudf/meta.yaml b/conda/recipes/pylibcudf/meta.yaml index 2ed52955f78..35e9108e9d8 100644 --- a/conda/recipes/pylibcudf/meta.yaml +++ b/conda/recipes/pylibcudf/meta.yaml @@ -77,7 +77,7 @@ requirements: - typing_extensions >=4.0.0 - pandas >=2.0,<2.2.4dev0 - numpy >=1.23,<3.0a0 - - pyarrow>=14.0.0,<18.0.0a0 + - pyarrow>=14.0.0,<20.0.0a0 - {{ pin_compatible('rmm', max_pin='x.x') }} - fsspec >=0.6.0 {% if cuda_major == "11" %} diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake index 07cbf5150f4..c519fa687c3 100644 --- a/cpp/cmake/thirdparty/get_arrow.cmake +++ b/cpp/cmake/thirdparty/get_arrow.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# Copyright (c) 2020-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -347,7 +347,7 @@ if(NOT DEFINED CUDF_VERSION_Arrow) set(CUDF_VERSION_Arrow # This version must be kept in sync with the libarrow version pinned for builds in # dependencies.yaml. - 16.1.0 + 19.0.0 CACHE STRING "The version of Arrow to find (or build)" ) endif() diff --git a/cpp/cmake/thirdparty/get_nanoarrow.cmake b/cpp/cmake/thirdparty/get_nanoarrow.cmake index b0c48e04710..6765202cc5e 100644 --- a/cpp/cmake/thirdparty/get_nanoarrow.cmake +++ b/cpp/cmake/thirdparty/get_nanoarrow.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2024, NVIDIA CORPORATION. +# Copyright (c) 2024-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -23,11 +23,11 @@ function(find_and_configure_nanoarrow) # Currently we need to always build nanoarrow so we don't pickup a previous installed version set(CPM_DOWNLOAD_nanoarrow ON) rapids_cpm_find( - nanoarrow 0.6.0.dev + nanoarrow 0.7.0.dev GLOBAL_TARGETS nanoarrow CPM_ARGS GIT_REPOSITORY https://github.com/apache/arrow-nanoarrow.git - GIT_TAG 1e2664a70ec14907409cadcceb14d79b9670bcdb + GIT_TAG 4bf5a9322626e95e3717e43de7616c0a256179eb GIT_SHALLOW FALSE OPTIONS "BUILD_SHARED_LIBS OFF" "NANOARROW_NAMESPACE cudf" ${_exclude_from_all} ) diff --git a/cpp/cmake/thirdparty/patches/nanoarrow_override.json b/cpp/cmake/thirdparty/patches/nanoarrow_override.json new file mode 100644 index 00000000000..7dd4312a674 --- /dev/null +++ b/cpp/cmake/thirdparty/patches/nanoarrow_override.json @@ -0,0 +1,18 @@ + +{ + "packages" : { + "nanoarrow" : { + "version" : "0.7.0.dev", + "git_url" : "https://github.com/apache/arrow-nanoarrow.git", + "git_tag" : "4bf5a9322626e95e3717e43de7616c0a256179eb", + "git_shallow" : false, + "patches" : [ + { + "file" : "${current_json_dir}/nanoarrow_clang_tidy_compliance.diff", + "issue" : "https://github.com/apache/arrow-nanoarrow/issues/537", + "fixed_in" : "" + } + ] + } + } +} diff --git a/cpp/src/interop/arrow_utilities.cpp b/cpp/src/interop/arrow_utilities.cpp index c69ebe12d2c..1e137b8cbed 100644 --- a/cpp/src/interop/arrow_utilities.cpp +++ b/cpp/src/interop/arrow_utilities.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -63,6 +63,8 @@ data_type arrow_to_cudf_type(ArrowSchemaView const* arrow_view) default: CUDF_FAIL("Unsupported duration unit in arrow", cudf::data_type_error); } } + case NANOARROW_TYPE_DECIMAL32: return data_type{type_id::DECIMAL32, -arrow_view->decimal_scale}; + case NANOARROW_TYPE_DECIMAL64: return data_type{type_id::DECIMAL64, -arrow_view->decimal_scale}; case NANOARROW_TYPE_DECIMAL128: return data_type{type_id::DECIMAL128, -arrow_view->decimal_scale}; default: CUDF_FAIL("Unsupported type_id conversion to cudf", cudf::data_type_error); @@ -84,6 +86,8 @@ ArrowType id_to_arrow_type(cudf::type_id id) case cudf::type_id::FLOAT32: return NANOARROW_TYPE_FLOAT; case cudf::type_id::FLOAT64: return NANOARROW_TYPE_DOUBLE; case cudf::type_id::TIMESTAMP_DAYS: return NANOARROW_TYPE_DATE32; + case cudf::type_id::DECIMAL32: return NANOARROW_TYPE_DECIMAL32; + case cudf::type_id::DECIMAL64: return NANOARROW_TYPE_DECIMAL64; case cudf::type_id::DECIMAL128: return NANOARROW_TYPE_DECIMAL128; default: CUDF_FAIL("Unsupported type_id conversion to arrow type", cudf::data_type_error); } diff --git a/cpp/src/interop/arrow_utilities.hpp b/cpp/src/interop/arrow_utilities.hpp index e4bdedf6603..522f8915049 100644 --- a/cpp/src/interop/arrow_utilities.hpp +++ b/cpp/src/interop/arrow_utilities.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -70,21 +70,5 @@ ArrowType id_to_arrow_storage_type(cudf::type_id id); */ int initialize_array(ArrowArray* arr, ArrowType storage_type, cudf::column_view column); -/** - * @brief Helper to convert decimal values to 128-bit versions for Arrow compatibility - * - * The template parameter should be the underlying type of the data (e.g. int32_t for - * 32-bit decimal and int64_t for 64-bit decimal). - * - * @param input column_view of the data - * @param stream cuda stream to perform the operations on - * @param mr memory resource to allocate the returned device_uvector with - * @return unique_ptr to a device_buffer containing the upcasted data - */ -template -std::unique_ptr decimals_to_arrow(cudf::column_view input, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr); - } // namespace detail } // namespace cudf diff --git a/cpp/src/interop/from_arrow_device.cu b/cpp/src/interop/from_arrow_device.cu index cb3c4c55a61..29c4dfd35ac 100644 --- a/cpp/src/interop/from_arrow_device.cu +++ b/cpp/src/interop/from_arrow_device.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -49,9 +49,7 @@ namespace { using dispatch_tuple_t = std::tuple; struct dispatch_from_arrow_device { - template () && - !std::is_same_v)> + template () && !is_fixed_point())> dispatch_tuple_t operator()(ArrowSchemaView*, ArrowArray const*, data_type, @@ -62,8 +60,7 @@ struct dispatch_from_arrow_device { CUDF_FAIL("Unsupported type in from_arrow_device", cudf::data_type_error); } - template () || std::is_same_v)> + template () || is_fixed_point())> dispatch_tuple_t operator()(ArrowSchemaView* schema, ArrowArray const* input, data_type type, diff --git a/cpp/src/interop/from_arrow_host.cu b/cpp/src/interop/from_arrow_host.cu index b5d2427e288..ea5487a2960 100644 --- a/cpp/src/interop/from_arrow_host.cu +++ b/cpp/src/interop/from_arrow_host.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -69,22 +69,19 @@ struct dispatch_copy_from_arrow_host { return mask; } - template () && - !std::is_same_v)> + template () && !is_fixed_point())> std::unique_ptr operator()(ArrowSchemaView*, ArrowArray const*, data_type, bool) { CUDF_FAIL("Unsupported type in copy_from_arrow_host."); } - template () || std::is_same_v)> + template () || is_fixed_point())> std::unique_ptr operator()(ArrowSchemaView* schema, ArrowArray const* input, data_type type, bool skip_mask) { - using DeviceType = std::conditional_t, __int128_t, T>; + using DeviceType = device_storage_type_t; size_type const num_rows = input->length; size_type const offset = input->offset; diff --git a/cpp/src/interop/to_arrow_device.cu b/cpp/src/interop/to_arrow_device.cu index fc1b0226a48..17eff1128f6 100644 --- a/cpp/src/interop/to_arrow_device.cu +++ b/cpp/src/interop/to_arrow_device.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -92,13 +92,15 @@ int set_buffer(std::unique_ptr device_buf, int64_t i, ArrowArray* out) } struct dispatch_to_arrow_device { - template ())> + template () and not is_fixed_point())> int operator()(cudf::column&&, rmm::cuda_stream_view, rmm::device_async_resource_ref, ArrowArray*) { CUDF_FAIL("Unsupported type for to_arrow_device", cudf::data_type_error); } - template ())> + // cover rep layout compatible and decimal types + template () or is_fixed_point())> int operator()(cudf::column&& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr, @@ -132,64 +134,6 @@ struct dispatch_to_arrow_device { } }; -template -int construct_decimals(cudf::column_view input, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr, - ArrowArray* out) -{ - nanoarrow::UniqueArray tmp; - NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), NANOARROW_TYPE_DECIMAL128, input)); - - auto buf = detail::convert_decimals_to_decimal128(input, stream, mr); - // Synchronize stream here to ensure the decimal128 buffer is ready. - stream.synchronize(); - NANOARROW_RETURN_NOT_OK(set_buffer(std::move(buf), fixed_width_data_buffer_idx, tmp.get())); - - ArrowArrayMove(tmp.get(), out); - return NANOARROW_OK; -} - -template <> -int dispatch_to_arrow_device::operator()(cudf::column&& column, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr, - ArrowArray* out) -{ - using DeviceType = int32_t; - NANOARROW_RETURN_NOT_OK(construct_decimals(column.view(), stream, mr, out)); - auto contents = column.release(); - NANOARROW_RETURN_NOT_OK(set_null_mask(contents, out)); - return NANOARROW_OK; -} - -template <> -int dispatch_to_arrow_device::operator()(cudf::column&& column, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr, - ArrowArray* out) -{ - using DeviceType = int64_t; - NANOARROW_RETURN_NOT_OK(construct_decimals(column.view(), stream, mr, out)); - auto contents = column.release(); - NANOARROW_RETURN_NOT_OK(set_null_mask(contents, out)); - return NANOARROW_OK; -} - -template <> -int dispatch_to_arrow_device::operator()(cudf::column&& column, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr, - ArrowArray* out) -{ - nanoarrow::UniqueArray tmp; - NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), NANOARROW_TYPE_DECIMAL128, column)); - auto contents = column.release(); - NANOARROW_RETURN_NOT_OK(set_contents(contents, tmp.get())); - ArrowArrayMove(tmp.get(), out); - return NANOARROW_OK; -} - template <> int dispatch_to_arrow_device::operator()(cudf::column&& column, rmm::cuda_stream_view stream, @@ -350,13 +294,14 @@ struct dispatch_to_arrow_device_view { rmm::cuda_stream_view stream; rmm::device_async_resource_ref mr; - template ())> + template () and not is_fixed_point())> int operator()(ArrowArray*) const { CUDF_FAIL("Unsupported type for to_arrow_device", cudf::data_type_error); } - template ())> + template () or is_fixed_point())> int operator()(ArrowArray* out) const { nanoarrow::UniqueArray tmp; @@ -404,37 +349,6 @@ struct dispatch_to_arrow_device_view { } }; -template <> -int dispatch_to_arrow_device_view::operator()(ArrowArray* out) const -{ - using DeviceType = int32_t; - NANOARROW_RETURN_NOT_OK(construct_decimals(column, stream, mr, out)); - NANOARROW_RETURN_NOT_OK(set_null_mask(column, out)); - return NANOARROW_OK; -} - -template <> -int dispatch_to_arrow_device_view::operator()(ArrowArray* out) const -{ - using DeviceType = int64_t; - NANOARROW_RETURN_NOT_OK(construct_decimals(column, stream, mr, out)); - NANOARROW_RETURN_NOT_OK(set_null_mask(column, out)); - return NANOARROW_OK; -} - -template <> -int dispatch_to_arrow_device_view::operator()(ArrowArray* out) const -{ - nanoarrow::UniqueArray tmp; - - NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), NANOARROW_TYPE_DECIMAL128, column)); - NANOARROW_RETURN_NOT_OK(set_null_mask(column, tmp.get())); - NANOARROW_RETURN_NOT_OK(set_view_to_buffer(column, tmp.get())); - - ArrowArrayMove(tmp.get(), out); - return NANOARROW_OK; -} - template <> int dispatch_to_arrow_device_view::operator()(ArrowArray* out) const { diff --git a/cpp/src/interop/to_arrow_host.cu b/cpp/src/interop/to_arrow_host.cu index 8ec0904f1ba..e93fdda0c1a 100644 --- a/cpp/src/interop/to_arrow_host.cu +++ b/cpp/src/interop/to_arrow_host.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -115,8 +115,7 @@ struct dispatch_to_arrow_host { CUDF_FAIL("Unsupported type for to_arrow_host", cudf::data_type_error); } - template () || std::is_same_v)> + template () || is_fixed_point())> int operator()(ArrowArray* out) const { nanoarrow::UniqueArray tmp; @@ -125,7 +124,7 @@ struct dispatch_to_arrow_host { NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), storage_type, column)); NANOARROW_RETURN_NOT_OK(populate_validity_bitmap(ArrowArrayValidityBitmap(tmp.get()))); - using DataType = std::conditional_t, __int128_t, T>; + using DataType = device_storage_type_t; NANOARROW_RETURN_NOT_OK( populate_data_buffer(device_span(column.data(), column.size()), ArrowArrayBuffer(tmp.get(), fixed_width_data_buffer_idx))); @@ -133,32 +132,6 @@ struct dispatch_to_arrow_host { ArrowArrayMove(tmp.get(), out); return NANOARROW_OK; } - - // convert decimal types from libcudf to arrow where those types are not directly - // supported by Arrow. These types must be fit into 128 bits, the smallest - // decimal resolution supported by Arrow - template () && - (std::is_same_v || - std::is_same_v))> - int operator()(ArrowArray* out) const - { - using DeviceType = std::conditional_t, int32_t, int64_t>; - nanoarrow::UniqueArray tmp; - NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), NANOARROW_TYPE_DECIMAL128, column)); - - NANOARROW_RETURN_NOT_OK(populate_validity_bitmap(ArrowArrayValidityBitmap(tmp.get()))); - auto buf = detail::convert_decimals_to_decimal128(column, stream, mr); - // No need to synchronize stream here as populate_data_buffer uses the same stream to copy data - // to host. - NANOARROW_RETURN_NOT_OK( - populate_data_buffer(device_span<__int128_t const>( - reinterpret_cast(buf->data()), column.size()), - ArrowArrayBuffer(tmp.get(), fixed_width_data_buffer_idx))); - - ArrowArrayMove(tmp.get(), out); - return NANOARROW_OK; - } }; int get_column(cudf::column_view column, diff --git a/cpp/src/interop/to_arrow_schema.cpp b/cpp/src/interop/to_arrow_schema.cpp index 5dd8d77c261..7ffee53dac8 100644 --- a/cpp/src/interop/to_arrow_schema.cpp +++ b/cpp/src/interop/to_arrow_schema.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -76,15 +76,10 @@ struct dispatch_to_arrow_type { }; template -int decimals_to_arrow(column_view input, ArrowSchema* out) +int decimals_to_arrow(column_view input, int32_t precision, ArrowSchema* out) { - // Arrow doesn't support decimal32/decimal64 currently. decimal128 - // is the smallest that arrow supports besides float32/float64 so we - // upcast to decimal128. - return ArrowSchemaSetTypeDecimal(out, - NANOARROW_TYPE_DECIMAL128, - cudf::detail::max_precision(), - -input.type().scale()); + return ArrowSchemaSetTypeDecimal( + out, id_to_arrow_type(input.type().id()), precision, -input.type().scale()); } template <> @@ -93,7 +88,7 @@ int dispatch_to_arrow_type::operator()(column_view input, ArrowSchema* out) { using DeviceType = int32_t; - return decimals_to_arrow(input, out); + return decimals_to_arrow(input, cudf::detail::max_precision(), out); } template <> @@ -102,7 +97,9 @@ int dispatch_to_arrow_type::operator()(column_view input, ArrowSchema* out) { using DeviceType = int64_t; - return decimals_to_arrow(input, out); + // Arrow decimal 64 maxes at precision of 18, cudf::detail::max_precision() produces 19. + // decimal32 has precision 1 - 9, decimal64 has precision 10 - 18, decimal128 is 19 - 38 + return decimals_to_arrow(input, cudf::detail::max_precision() - 1, out); } template <> @@ -111,7 +108,7 @@ int dispatch_to_arrow_type::operator()(column_view input, ArrowSchema* out) { using DeviceType = __int128_t; - return decimals_to_arrow(input, out); + return decimals_to_arrow(input, cudf::detail::max_precision(), out); } template <> diff --git a/cpp/tests/interop/arrow_utils.hpp b/cpp/tests/interop/arrow_utils.hpp index e785845394a..6041aa14b60 100644 --- a/cpp/tests/interop/arrow_utils.hpp +++ b/cpp/tests/interop/arrow_utils.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -238,28 +238,34 @@ std::pair, std::shared_ptr> get_table cudf::size_type length = 10000); template -[[nodiscard]] auto make_decimal128_arrow_array(std::vector const& data, - std::optional> const& validity, - int32_t scale) -> std::shared_ptr +std::enable_if_t, + std::is_same, + std::is_same>, + std::shared_ptr> +get_decimal_arrow_array(std::vector const& data, + std::optional> const& validity, + int32_t precision, + int32_t scale) { - auto constexpr BIT_WIDTH_RATIO = sizeof(__int128_t) / sizeof(T); - - std::shared_ptr arr; - arrow::Decimal128Builder decimal_builder(arrow::decimal(cudf::detail::max_precision(), -scale), - arrow::default_memory_pool()); - - for (T i = 0; i < static_cast(data.size() / BIT_WIDTH_RATIO); ++i) { - if (validity.has_value() and not validity.value()[i]) { - CUDF_EXPECTS(decimal_builder.AppendNull().ok(), "Failed to append"); - } else { - CUDF_EXPECTS( - decimal_builder.Append(reinterpret_cast(data.data() + BIT_WIDTH_RATIO * i)) - .ok(), - "Failed to append"); - } - } + std::shared_ptr data_buffer; + arrow::BufferBuilder buff_builder; + CUDF_EXPECTS(buff_builder.Append(data.data(), sizeof(T) * data.size()).ok(), + "Failed to append values to buffer builder"); + CUDF_EXPECTS(buff_builder.Finish(&data_buffer).ok(), "Failed to allocate buffer"); + + std::shared_ptr mask_buffer = + !validity.has_value() ? nullptr : arrow::internal::BytesToBits(validity.value()).ValueOrDie(); - CUDF_EXPECTS(decimal_builder.Finish(&arr).ok(), "Failed to build array"); + std::shared_ptr data_type; + if constexpr (std::is_same_v) { + data_type = arrow::decimal32(precision, -scale); + } else if constexpr (std::is_same_v) { + data_type = arrow::decimal64(precision, -scale); + } else { + data_type = arrow::decimal128(precision, -scale); + } - return arr; + auto array_data = std::make_shared( + data_type, data.size(), std::vector>{mask_buffer, data_buffer}); + return arrow::MakeArray(array_data); } diff --git a/cpp/tests/interop/from_arrow_device_test.cpp b/cpp/tests/interop/from_arrow_device_test.cpp index 1ddc33e749a..3f332c9f46b 100644 --- a/cpp/tests/interop/from_arrow_device_test.cpp +++ b/cpp/tests/interop/from_arrow_device_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,7 +40,12 @@ struct FromArrowDeviceTest : public cudf::test::BaseFixture {}; template struct FromArrowDeviceTestDurationsTest : public cudf::test::BaseFixture {}; +template +struct FromArrowDeviceTestDecimalsTest : public cudf::test::BaseFixture {}; + TYPED_TEST_SUITE(FromArrowDeviceTestDurationsTest, cudf::test::DurationTypes); +using FixedPointTypes = cudf::test::Types; +TYPED_TEST_SUITE(FromArrowDeviceTestDecimalsTest, FixedPointTypes); TEST_F(FromArrowDeviceTest, FailConditions) { @@ -568,23 +573,24 @@ INSTANTIATE_TEST_CASE_P(FromArrowDeviceTest, template using fp_wrapper = cudf::test::fixed_point_column_wrapper; -TEST_F(FromArrowDeviceTest, FixedPoint128Table) +TYPED_TEST(FromArrowDeviceTestDecimalsTest, FixedPointTable) { + using T = TypeParam; using namespace numeric; + auto const precision = get_decimal_precision(); + for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) { - auto const data = std::vector<__int128_t>{1, 2, 3, 4, 5, 6}; - auto const col = fp_wrapper<__int128_t>(data.cbegin(), data.cend(), scale_type{scale}); + auto const data = std::vector{1, 2, 3, 4, 5, 6}; + auto const col = fp_wrapper(data.cbegin(), data.cend(), scale_type{scale}); auto const expected = cudf::table_view({col}); nanoarrow::UniqueSchema input_schema; ArrowSchemaInit(input_schema.get()); NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(input_schema.get(), 1)); ArrowSchemaInit(input_schema->children[0]); - NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(input_schema->children[0], - NANOARROW_TYPE_DECIMAL128, - cudf::detail::max_precision<__int128_t>(), - -scale)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal( + input_schema->children[0], nanoarrow_decimal_type::type, precision, -scale)); NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(input_schema->children[0], "a")); nanoarrow::UniqueArray input_array; @@ -592,7 +598,7 @@ TEST_F(FromArrowDeviceTest, FixedPoint128Table) ArrowArrayInitFromSchema(input_array.get(), input_schema.get(), nullptr)); input_array->length = expected.num_rows(); - populate_from_col<__int128_t>(input_array->children[0], expected.column(0)); + populate_from_col(input_array->children[0], expected.column(0)); NANOARROW_THROW_NOT_OK( ArrowArrayFinishBuilding(input_array.get(), NANOARROW_VALIDATION_LEVEL_NONE, nullptr)); @@ -613,25 +619,26 @@ TEST_F(FromArrowDeviceTest, FixedPoint128Table) } } -TEST_F(FromArrowDeviceTest, FixedPoint128TableLarge) +TYPED_TEST(FromArrowDeviceTestDecimalsTest, FixedPointTableLarge) { + using T = TypeParam; using namespace numeric; + + auto const precision = get_decimal_precision(); auto constexpr NUM_ELEMENTS = 1000; for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) { auto iota = thrust::make_counting_iterator(1); - auto const data = std::vector<__int128_t>(iota, iota + NUM_ELEMENTS); - auto const col = fp_wrapper<__int128_t>(iota, iota + NUM_ELEMENTS, scale_type{scale}); + auto const data = std::vector(iota, iota + NUM_ELEMENTS); + auto const col = fp_wrapper(iota, iota + NUM_ELEMENTS, scale_type{scale}); auto const expected = cudf::table_view({col}); nanoarrow::UniqueSchema input_schema; ArrowSchemaInit(input_schema.get()); NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(input_schema.get(), 1)); ArrowSchemaInit(input_schema->children[0]); - NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(input_schema->children[0], - NANOARROW_TYPE_DECIMAL128, - cudf::detail::max_precision<__int128_t>(), - -scale)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal( + input_schema->children[0], nanoarrow_decimal_type::type, precision, -scale)); NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(input_schema->children[0], "a")); nanoarrow::UniqueArray input_array; @@ -639,7 +646,7 @@ TEST_F(FromArrowDeviceTest, FixedPoint128TableLarge) ArrowArrayInitFromSchema(input_array.get(), input_schema.get(), nullptr)); input_array->length = expected.num_rows(); - populate_from_col<__int128_t>(input_array->children[0], expected.column(0)); + populate_from_col(input_array->children[0], expected.column(0)); NANOARROW_THROW_NOT_OK( ArrowArrayFinishBuilding(input_array.get(), NANOARROW_VALIDATION_LEVEL_NONE, nullptr)); @@ -660,25 +667,26 @@ TEST_F(FromArrowDeviceTest, FixedPoint128TableLarge) } } -TEST_F(FromArrowDeviceTest, FixedPoint128TableNulls) +TYPED_TEST(FromArrowDeviceTestDecimalsTest, FixedPointTableNulls) { + using T = TypeParam; using namespace numeric; + auto const precision = get_decimal_precision(); + for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) { - auto const data = std::vector<__int128_t>{1, 2, 3, 4, 5, 6, 0, 0}; + auto const data = std::vector{1, 2, 3, 4, 5, 6, 0, 0}; auto const validity = std::vector{1, 1, 1, 1, 1, 1, 0, 0}; auto const col = - fp_wrapper<__int128_t>({1, 2, 3, 4, 5, 6, 0, 0}, {1, 1, 1, 1, 1, 1, 0, 0}, scale_type{scale}); + fp_wrapper({1, 2, 3, 4, 5, 6, 0, 0}, {1, 1, 1, 1, 1, 1, 0, 0}, scale_type{scale}); auto const expected = cudf::table_view({col}); nanoarrow::UniqueSchema input_schema; ArrowSchemaInit(input_schema.get()); NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(input_schema.get(), 1)); ArrowSchemaInit(input_schema->children[0]); - NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(input_schema->children[0], - NANOARROW_TYPE_DECIMAL128, - cudf::detail::max_precision<__int128_t>(), - -scale)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal( + input_schema->children[0], nanoarrow_decimal_type::type, precision, -scale)); NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(input_schema->children[0], "a")); nanoarrow::UniqueArray input_array; @@ -686,7 +694,7 @@ TEST_F(FromArrowDeviceTest, FixedPoint128TableNulls) ArrowArrayInitFromSchema(input_array.get(), input_schema.get(), nullptr)); input_array->length = expected.num_rows(); - populate_from_col<__int128_t>(input_array->children[0], expected.column(0)); + populate_from_col(input_array->children[0], expected.column(0)); NANOARROW_THROW_NOT_OK( ArrowArrayFinishBuilding(input_array.get(), NANOARROW_VALIDATION_LEVEL_NONE, nullptr)); @@ -707,27 +715,28 @@ TEST_F(FromArrowDeviceTest, FixedPoint128TableNulls) } } -TEST_F(FromArrowDeviceTest, FixedPoint128TableNullsLarge) +TYPED_TEST(FromArrowDeviceTestDecimalsTest, FixedPointTableNullsLarge) { + using T = TypeParam; using namespace numeric; + + auto const precision = get_decimal_precision(); auto constexpr NUM_ELEMENTS = 1000; for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) { - auto every_other = [](auto i) { return i % 2 ? 0 : 1; }; - auto validity = cudf::detail::make_counting_transform_iterator(0, every_other); - auto iota = thrust::make_counting_iterator(1); - auto const data = std::vector<__int128_t>(iota, iota + NUM_ELEMENTS); - auto const col = fp_wrapper<__int128_t>(iota, iota + NUM_ELEMENTS, validity, scale_type{scale}); + auto every_other = [](auto i) { return i % 2 ? 0 : 1; }; + auto validity = cudf::detail::make_counting_transform_iterator(0, every_other); + auto iota = thrust::make_counting_iterator(1); + auto const data = std::vector(iota, iota + NUM_ELEMENTS); + auto const col = fp_wrapper(iota, iota + NUM_ELEMENTS, validity, scale_type{scale}); auto const expected = cudf::table_view({col}); nanoarrow::UniqueSchema input_schema; ArrowSchemaInit(input_schema.get()); NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(input_schema.get(), 1)); ArrowSchemaInit(input_schema->children[0]); - NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(input_schema->children[0], - NANOARROW_TYPE_DECIMAL128, - cudf::detail::max_precision<__int128_t>(), - -scale)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal( + input_schema->children[0], nanoarrow_decimal_type::type, precision, -scale)); NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(input_schema->children[0], "a")); nanoarrow::UniqueArray input_array; @@ -735,7 +744,7 @@ TEST_F(FromArrowDeviceTest, FixedPoint128TableNullsLarge) ArrowArrayInitFromSchema(input_array.get(), input_schema.get(), nullptr)); input_array->length = expected.num_rows(); - populate_from_col<__int128_t>(input_array->children[0], expected.column(0)); + populate_from_col(input_array->children[0], expected.column(0)); NANOARROW_THROW_NOT_OK( ArrowArrayFinishBuilding(input_array.get(), NANOARROW_VALIDATION_LEVEL_NONE, nullptr)); diff --git a/cpp/tests/interop/from_arrow_host_test.cpp b/cpp/tests/interop/from_arrow_host_test.cpp index 1ab11b374b6..72c673eff11 100644 --- a/cpp/tests/interop/from_arrow_host_test.cpp +++ b/cpp/tests/interop/from_arrow_host_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -95,7 +96,12 @@ struct FromArrowHostDeviceTest : public cudf::test::BaseFixture {}; template struct FromArrowHostDeviceTestDurationsTest : public cudf::test::BaseFixture {}; +template +struct FromArrowHostDeviceTestDecimalsTest : public cudf::test::BaseFixture {}; + TYPED_TEST_SUITE(FromArrowHostDeviceTestDurationsTest, cudf::test::DurationTypes); +using FixedPointTypes = cudf::test::Types; +TYPED_TEST_SUITE(FromArrowHostDeviceTestDecimalsTest, FixedPointTypes); TEST_F(FromArrowHostDeviceTest, EmptyTable) { @@ -215,6 +221,220 @@ TYPED_TEST(FromArrowHostDeviceTestDurationsTest, DurationTable) CUDF_TEST_EXPECT_TABLES_EQUAL(got_cudf_table->view(), from_struct); } +template +using fp_wrapper = cudf::test::fixed_point_column_wrapper; + +TYPED_TEST(FromArrowHostDeviceTestDecimalsTest, FixedPointTable) +{ + using T = TypeParam; + using namespace numeric; + + auto const precision = get_decimal_precision(); + for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) { + auto const data = std::vector{1, 2, 3, 4, 5, 6}; + auto const col = fp_wrapper(data.cbegin(), data.cend(), scale_type{scale}); + auto const expected = cudf::table_view({col}); + + nanoarrow::UniqueSchema input_schema; + ArrowSchemaInit(input_schema.get()); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(input_schema.get(), 1)); + ArrowSchemaInit(input_schema->children[0]); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal( + input_schema->children[0], nanoarrow_decimal_type::type, precision, -scale)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(input_schema->children[0], "a")); + + nanoarrow::UniqueArray input_array; + NANOARROW_THROW_NOT_OK( + ArrowArrayInitFromSchema(input_array.get(), input_schema.get(), nullptr)); + input_array->length = expected.num_rows(); + input_array->null_count = 0; + + auto arr = get_nanoarrow_array(data); + arr.move(input_array->children[0]); + NANOARROW_THROW_NOT_OK( + ArrowArrayFinishBuilding(input_array.get(), NANOARROW_VALIDATION_LEVEL_MINIMAL, nullptr)); + + ArrowDeviceArray input; + memcpy(&input.array, input_array.get(), sizeof(ArrowArray)); + input.device_id = -1; + input.device_type = ARROW_DEVICE_CPU; + + // converting arrow host memory to cudf table gives us the expected table + auto got_cudf_table = cudf::from_arrow_host(input_schema.get(), &input); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table->view()); + + // converting to a cudf table with a single struct column gives us the expected + // result column + auto got_cudf_col = cudf::from_arrow_host_column(input_schema.get(), &input); + EXPECT_EQ(got_cudf_col->type(), cudf::data_type{cudf::type_id::STRUCT}); + auto got_cudf_col_view = got_cudf_col->view(); + cudf::table_view from_struct{std::vector(got_cudf_col_view.child_begin(), + got_cudf_col_view.child_end())}; + CUDF_TEST_EXPECT_TABLES_EQUAL(got_cudf_table->view(), from_struct); + } +} + +TYPED_TEST(FromArrowHostDeviceTestDecimalsTest, FixedPointTableLarge) +{ + using T = TypeParam; + using namespace numeric; + + auto const precision = get_decimal_precision(); + auto constexpr NUM_ELEMENTS = 1000; + + for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) { + auto iota = thrust::make_counting_iterator(1); + auto const data = std::vector(iota, iota + NUM_ELEMENTS); + auto const col = fp_wrapper(iota, iota + NUM_ELEMENTS, scale_type{scale}); + auto const expected = cudf::table_view({col}); + + nanoarrow::UniqueSchema input_schema; + ArrowSchemaInit(input_schema.get()); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(input_schema.get(), 1)); + ArrowSchemaInit(input_schema->children[0]); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal( + input_schema->children[0], nanoarrow_decimal_type::type, precision, -scale)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(input_schema->children[0], "a")); + + nanoarrow::UniqueArray input_array; + NANOARROW_THROW_NOT_OK( + ArrowArrayInitFromSchema(input_array.get(), input_schema.get(), nullptr)); + input_array->length = expected.num_rows(); + input_array->null_count = 0; + + auto arr = get_nanoarrow_array(data); + arr.move(input_array->children[0]); + NANOARROW_THROW_NOT_OK( + ArrowArrayFinishBuilding(input_array.get(), NANOARROW_VALIDATION_LEVEL_MINIMAL, nullptr)); + + ArrowDeviceArray input; + memcpy(&input.array, input_array.get(), sizeof(ArrowArray)); + input.device_id = -1; + input.device_type = ARROW_DEVICE_CPU; + + // converting arrow host memory to cudf table gives us the expected table + auto got_cudf_table = cudf::from_arrow_host(input_schema.get(), &input); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table->view()); + + // converting to a cudf table with a single struct column gives us the expected + // result column + auto got_cudf_col = cudf::from_arrow_host_column(input_schema.get(), &input); + EXPECT_EQ(got_cudf_col->type(), cudf::data_type{cudf::type_id::STRUCT}); + auto got_cudf_col_view = got_cudf_col->view(); + cudf::table_view from_struct{std::vector(got_cudf_col_view.child_begin(), + got_cudf_col_view.child_end())}; + CUDF_TEST_EXPECT_TABLES_EQUAL(got_cudf_table->view(), from_struct); + } +} + +TYPED_TEST(FromArrowHostDeviceTestDecimalsTest, FixedPointTableNulls) +{ + using T = TypeParam; + using namespace numeric; + + auto const precision = get_decimal_precision(); + for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) { + auto const data = std::vector{1, 2, 3, 4, 5, 6}; + auto const validity = std::vector{1, 1, 1, 1, 1, 1, 0, 0}; + auto const col = fp_wrapper({1, 2, 3, 4, 5, 6}, {1, 1, 1, 1, 1, 1, 0, 0}, scale_type{scale}); + auto const expected = cudf::table_view({col}); + + nanoarrow::UniqueSchema input_schema; + ArrowSchemaInit(input_schema.get()); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(input_schema.get(), 1)); + ArrowSchemaInit(input_schema->children[0]); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal( + input_schema->children[0], nanoarrow_decimal_type::type, precision, -scale)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(input_schema->children[0], "a")); + + nanoarrow::UniqueArray input_array; + NANOARROW_THROW_NOT_OK( + ArrowArrayInitFromSchema(input_array.get(), input_schema.get(), nullptr)); + input_array->length = expected.num_rows(); + + auto arr = get_nanoarrow_array(data, validity); + arr.move(input_array->children[0]); + NANOARROW_THROW_NOT_OK( + ArrowArrayFinishBuilding(input_array.get(), NANOARROW_VALIDATION_LEVEL_MINIMAL, nullptr)); + + ArrowDeviceArray input; + memcpy(&input.array, input_array.get(), sizeof(ArrowArray)); + input.device_id = -1; + input.device_type = ARROW_DEVICE_CPU; + + // converting arrow host memory to cudf table gives us the expected table + auto got_cudf_table = cudf::from_arrow_host(input_schema.get(), &input); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table->view()); + + // converting to a cudf table with a single struct column gives us the expected + // result column + auto got_cudf_col = cudf::from_arrow_host_column(input_schema.get(), &input); + EXPECT_EQ(got_cudf_col->type(), cudf::data_type{cudf::type_id::STRUCT}); + auto got_cudf_col_view = got_cudf_col->view(); + cudf::table_view from_struct{std::vector(got_cudf_col_view.child_begin(), + got_cudf_col_view.child_end())}; + CUDF_TEST_EXPECT_TABLES_EQUAL(got_cudf_table->view(), from_struct); + } +} + +TYPED_TEST(FromArrowHostDeviceTestDecimalsTest, FixedPointTableLargeNulls) +{ + using T = TypeParam; + using namespace numeric; + + auto const precision = get_decimal_precision(); + auto constexpr NUM_ELEMENTS = 1000; + + for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) { + auto every_other = [](auto i) { return i % 2 ? 0 : 1; }; + auto validity = cudf::detail::make_counting_transform_iterator(0, every_other); + std::vector validity_vec(validity, validity + NUM_ELEMENTS); + auto iota = thrust::make_counting_iterator(1); + auto const data = std::vector(iota, iota + NUM_ELEMENTS); + auto const col = fp_wrapper(iota, + iota + NUM_ELEMENTS, + cudf::detail::make_counting_transform_iterator(0, every_other), + scale_type{scale}); + auto const expected = cudf::table_view({col}); + + nanoarrow::UniqueSchema input_schema; + ArrowSchemaInit(input_schema.get()); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(input_schema.get(), 1)); + ArrowSchemaInit(input_schema->children[0]); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal( + input_schema->children[0], nanoarrow_decimal_type::type, precision, -scale)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(input_schema->children[0], "a")); + + nanoarrow::UniqueArray input_array; + NANOARROW_THROW_NOT_OK( + ArrowArrayInitFromSchema(input_array.get(), input_schema.get(), nullptr)); + input_array->length = expected.num_rows(); + + auto arr = get_nanoarrow_array(data, validity_vec); + arr.move(input_array->children[0]); + NANOARROW_THROW_NOT_OK( + ArrowArrayFinishBuilding(input_array.get(), NANOARROW_VALIDATION_LEVEL_MINIMAL, nullptr)); + + ArrowDeviceArray input; + memcpy(&input.array, input_array.get(), sizeof(ArrowArray)); + input.device_id = -1; + input.device_type = ARROW_DEVICE_CPU; + + // converting arrow host memory to cudf table gives us the expected table + auto got_cudf_table = cudf::from_arrow_host(input_schema.get(), &input); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table->view()); + + // converting to a cudf table with a single struct column gives us the expected + // result column + auto got_cudf_col = cudf::from_arrow_host_column(input_schema.get(), &input); + EXPECT_EQ(got_cudf_col->type(), cudf::data_type{cudf::type_id::STRUCT}); + auto got_cudf_col_view = got_cudf_col->view(); + cudf::table_view from_struct{std::vector(got_cudf_col_view.child_begin(), + got_cudf_col_view.child_end())}; + CUDF_TEST_EXPECT_TABLES_EQUAL(got_cudf_table->view(), from_struct); + } +} + TEST_F(FromArrowHostDeviceTest, NestedList) { auto valids = diff --git a/cpp/tests/interop/from_arrow_test.cpp b/cpp/tests/interop/from_arrow_test.cpp index 62e38cbfd45..31a3230e084 100644 --- a/cpp/tests/interop/from_arrow_test.cpp +++ b/cpp/tests/interop/from_arrow_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ */ #include +#include #include #include @@ -86,6 +87,9 @@ struct FromArrowTest : public cudf::test::BaseFixture {}; template struct FromArrowTestDurationsTest : public cudf::test::BaseFixture {}; +template +struct FromArrowTestDecimalsTest : public cudf::test::BaseFixture {}; + std::optional> export_table(std::shared_ptr arrow_table) { ArrowSchema schema; @@ -126,6 +130,8 @@ std::optional> export_scalar( } TYPED_TEST_SUITE(FromArrowTestDurationsTest, cudf::test::DurationTypes); +using FixedPointTypes = cudf::test::Types; +TYPED_TEST_SUITE(FromArrowTestDecimalsTest, FixedPointTypes); TEST_F(FromArrowTest, EmptyTable) { @@ -210,7 +216,7 @@ TEST_F(FromArrowTest, NestedList) auto list_arr = get_arrow_list_array({6, 7, 8, 9}, {0, 1, 4}, {1, 0, 1, 1}); std::vector offset{0, 0, 2}; - auto mask_buffer = arrow::internal::BytesToBits({0, 1}).ValueOrDie(); + auto mask_buffer = arrow::internal::BytesToBits(std::vector({0, 1})).ValueOrDie(); auto nested_list_arr = std::make_shared(arrow::list(list(arrow::int64())), offset.size() - 1, arrow::Buffer::Wrap(offset), @@ -288,9 +294,10 @@ TEST_F(FromArrowTest, StructColumn) auto fields2 = std::vector>{ std::make_shared("string2", str2_array->type(), str2_array->null_count() > 0), std::make_shared("integral2", int2_array->type(), int2_array->null_count() > 0)}; - std::shared_ptr mask_buffer = arrow::internal::BytesToBits({1, 1, 0}).ValueOrDie(); - auto dtype2 = std::make_shared(fields2); - auto struct_array2 = std::make_shared( + std::shared_ptr mask_buffer = + arrow::internal::BytesToBits(std::vector({1, 1, 0})).ValueOrDie(); + auto dtype2 = std::make_shared(fields2); + auto struct_array2 = std::make_shared( dtype2, static_cast(expected_cudf_table.num_rows()), child_arrays2, mask_buffer); std::vector> child_arrays( @@ -444,16 +451,18 @@ TEST_P(FromArrowTestSlice, SliceTest) template using fp_wrapper = cudf::test::fixed_point_column_wrapper; -TEST_F(FromArrowTest, FixedPoint128Table) +TYPED_TEST(FromArrowTestDecimalsTest, FixedPointTable) { + using T = TypeParam; using namespace numeric; + auto const precision = get_decimal_precision(); for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) { - auto const data = std::vector<__int128_t>{1, 2, 3, 4, 5, 6}; - auto const col = fp_wrapper<__int128_t>(data.cbegin(), data.cend(), scale_type{scale}); + auto const data = std::vector{1, 2, 3, 4, 5, 6}; + auto const col = fp_wrapper(data.cbegin(), data.cend(), scale_type{scale}); auto const expected = cudf::table_view({col}); - auto const arr = make_decimal128_arrow_array(data, std::nullopt, scale); + auto const arr = get_decimal_arrow_array(data, std::nullopt, precision, scale); auto const field = arrow::field("a", arr->type()); auto const schema_vector = std::vector>({field}); @@ -467,18 +476,21 @@ TEST_F(FromArrowTest, FixedPoint128Table) } } -TEST_F(FromArrowTest, FixedPoint128TableLarge) +TYPED_TEST(FromArrowTestDecimalsTest, FixedPointTableLarge) { + using T = TypeParam; using namespace numeric; + + auto const precision = get_decimal_precision(); auto constexpr NUM_ELEMENTS = 1000; for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) { auto iota = thrust::make_counting_iterator(1); - auto const data = std::vector<__int128_t>(iota, iota + NUM_ELEMENTS); - auto const col = fp_wrapper<__int128_t>(iota, iota + NUM_ELEMENTS, scale_type{scale}); + auto const data = std::vector(iota, iota + NUM_ELEMENTS); + auto const col = fp_wrapper(iota, iota + NUM_ELEMENTS, scale_type{scale}); auto const expected = cudf::table_view({col}); - auto const arr = make_decimal128_arrow_array(data, std::nullopt, scale); + auto const arr = get_decimal_arrow_array(data, std::nullopt, precision, scale); auto const field = arrow::field("a", arr->type()); auto const schema_vector = std::vector>({field}); @@ -492,19 +504,21 @@ TEST_F(FromArrowTest, FixedPoint128TableLarge) } } -TEST_F(FromArrowTest, FixedPoint128TableNulls) +TYPED_TEST(FromArrowTestDecimalsTest, FixedPointTableNulls) { + using T = TypeParam; using namespace numeric; + auto const precision = get_decimal_precision(); for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) { - auto const data = std::vector<__int128_t>{1, 2, 3, 4, 5, 6, 0, 0}; - auto const validity = std::vector{1, 1, 1, 1, 1, 1, 0, 0}; - auto const col = fp_wrapper<__int128_t>({1, 2, 3, 4, 5, 6, 0, 0}, - {true, true, true, true, true, true, false, false}, - scale_type{scale}); + auto const data = std::vector{1, 2, 3, 4, 5, 6, 0, 0}; + auto const validity = std::vector{1, 1, 1, 1, 1, 1, 0, 0}; + auto const col = fp_wrapper({1, 2, 3, 4, 5, 6, 0, 0}, + {true, true, true, true, true, true, false, false}, + scale_type{scale}); auto const expected = cudf::table_view({col}); - auto const arr = make_decimal128_arrow_array(data, validity, scale); + auto const arr = get_decimal_arrow_array(data, validity, precision, scale); auto const field = arrow::field("a", arr->type()); auto const schema_vector = std::vector>({field}); @@ -518,21 +532,24 @@ TEST_F(FromArrowTest, FixedPoint128TableNulls) } } -TEST_F(FromArrowTest, FixedPoint128TableNullsLarge) +TYPED_TEST(FromArrowTestDecimalsTest, FixedPointTableNullsLarge) { + using T = TypeParam; using namespace numeric; + + auto const precision = get_decimal_precision(); auto constexpr NUM_ELEMENTS = 1000; for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) { - auto every_other = [](auto i) { return i % 2 ? 0 : 1; }; - auto validity = cudf::detail::make_counting_transform_iterator(0, every_other); - auto iota = thrust::make_counting_iterator(1); - auto const data = std::vector<__int128_t>(iota, iota + NUM_ELEMENTS); - auto const col = fp_wrapper<__int128_t>(iota, iota + NUM_ELEMENTS, validity, scale_type{scale}); + auto every_other = [](auto i) { return i % 2 ? 0 : 1; }; + auto validity = cudf::detail::make_counting_transform_iterator(0, every_other); + auto iota = thrust::make_counting_iterator(1); + auto const data = std::vector(iota, iota + NUM_ELEMENTS); + auto const col = fp_wrapper(iota, iota + NUM_ELEMENTS, validity, scale_type{scale}); auto const expected = cudf::table_view({col}); - auto const arr = make_decimal128_arrow_array( - data, std::vector(validity, validity + NUM_ELEMENTS), scale); + auto const arr = get_decimal_arrow_array( + data, std::vector(validity, validity + NUM_ELEMENTS), precision, scale); auto const field = arrow::field("a", arr->type()); auto const schema_vector = std::vector>({field}); @@ -579,24 +596,33 @@ TYPED_TEST(FromArrowNumericScalarTest, Basic) struct FromArrowDecimalScalarTest : public cudf::test::BaseFixture {}; -// Only testing Decimal128 because that's the only size cudf and arrow have in common. -TEST_F(FromArrowDecimalScalarTest, Basic) +template +void check_decimal_scalar(const int value, ScalarType const& arrow_scalar) { - auto const value{42}; - auto const precision{8}; auto const scale{4}; - auto arrow_scalar = arrow::Decimal128Scalar(value, arrow::decimal128(precision, -scale)); auto const cudf_scalar = export_scalar(arrow_scalar); ASSERT_TRUE(cudf_scalar.has_value()); - // Arrow offers a minimum of 128 bits for the Decimal type. auto const cudf_decimal_scalar = - dynamic_cast*>(cudf_scalar.value().get()); - EXPECT_EQ(cudf_decimal_scalar->type(), - cudf::data_type(cudf::type_to_id(), scale)); + dynamic_cast*>(cudf_scalar.value().get()); + EXPECT_EQ(cudf_decimal_scalar->type(), cudf::data_type(cudf::type_to_id(), scale)); EXPECT_EQ(cudf_decimal_scalar->value(), value); } +TEST_F(FromArrowDecimalScalarTest, Basic) +{ + auto const value{42}; + auto const precision{8}; + auto const scale{4}; + auto arrow_scalar32 = arrow::Decimal32Scalar(value, arrow::decimal32(precision, -scale)); + auto arrow_scalar64 = arrow::Decimal64Scalar(value, arrow::decimal64(precision, -scale)); + auto arrow_scalar128 = arrow::Decimal128Scalar(value, arrow::decimal128(precision, -scale)); + + check_decimal_scalar(value, arrow_scalar32); + check_decimal_scalar(value, arrow_scalar64); + check_decimal_scalar(value, arrow_scalar128); +} + struct FromArrowStringScalarTest : public cudf::test::BaseFixture {}; TEST_F(FromArrowStringScalarTest, Basic) diff --git a/cpp/tests/interop/nanoarrow_utils.hpp b/cpp/tests/interop/nanoarrow_utils.hpp index b7b8202a3c2..a1211a16e10 100644 --- a/cpp/tests/interop/nanoarrow_utils.hpp +++ b/cpp/tests/interop/nanoarrow_utils.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include #include @@ -217,6 +218,24 @@ DEFINE_NANOARROW_STORAGE(__int128_t, DECIMAL128); #undef DEFINE_NANOARROW_STORAGE +template +struct nanoarrow_decimal_type {}; + +template <> +struct nanoarrow_decimal_type { + static constexpr ArrowType type = NANOARROW_TYPE_DECIMAL32; +}; + +template <> +struct nanoarrow_decimal_type { + static constexpr ArrowType type = NANOARROW_TYPE_DECIMAL64; +}; + +template <> +struct nanoarrow_decimal_type<__int128_t> { + static constexpr ArrowType type = NANOARROW_TYPE_DECIMAL128; +}; + template std::enable_if_t() and !std::is_same_v, nanoarrow::UniqueArray> get_nanoarrow_array(std::vector const& data, std::vector const& mask = {}) @@ -384,3 +403,16 @@ std::tuple, nanoarrow::UniqueSchema, nanoarrow::Uni get_nanoarrow_host_tables(cudf::size_type length); void slice_host_nanoarrow(ArrowArray* arr, int64_t start, int64_t end); + +template +std::enable_if_t, + std::is_same, + std::is_same>, + std::size_t> +get_decimal_precision() +{ + if constexpr (std::is_same_v) + return 18; + else + return cudf::detail::max_precision(); +} diff --git a/cpp/tests/interop/to_arrow_device_test.cpp b/cpp/tests/interop/to_arrow_device_test.cpp index 112b3e1d8e2..7fcb1478196 100644 --- a/cpp/tests/interop/to_arrow_device_test.cpp +++ b/cpp/tests/interop/to_arrow_device_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -708,9 +708,7 @@ TEST_F(ToArrowDeviceTest, FixedPoint32Table) using namespace numeric; for (auto const scale : {6, 4, 2, 0, -1, -3, -5}) { - auto const expect_data = - std::vector{-1000, -1, -1, -1, 2400, 0, 0, 0, -3456, -1, -1, -1, - 4650, 0, 0, 0, 5154, 0, 0, 0, 6800, 0, 0, 0}; + auto const expect_data = std::vector{-1000, 2400, -3456, 4650, 5154, 6800}; auto col = fp_wrapper({-1000, 2400, -3456, 4650, 5154, 6800}, scale_type{scale}); std::vector> table_cols; table_cols.emplace_back(col.release()); @@ -721,7 +719,7 @@ TEST_F(ToArrowDeviceTest, FixedPoint32Table) NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(expected_schema.get(), 1)); ArrowSchemaInit(expected_schema->children[0]); NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(expected_schema->children[0], - NANOARROW_TYPE_DECIMAL128, + NANOARROW_TYPE_DECIMAL32, cudf::detail::max_precision(), -scale)); NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(expected_schema->children[0], "a")); @@ -731,36 +729,12 @@ TEST_F(ToArrowDeviceTest, FixedPoint32Table) cudf::to_arrow_schema(input.view(), std::vector{{"a"}}); compare_schemas(expected_schema.get(), got_arrow_schema.get()); - auto result_dev_data = std::make_unique>( - expect_data.size(), cudf::get_default_stream()); - cudaMemcpy(result_dev_data->data(), - expect_data.data(), - sizeof(int32_t) * expect_data.size(), - cudaMemcpyHostToDevice); - - cudf::get_default_stream().synchronize(); nanoarrow::UniqueArray expected_array; NANOARROW_THROW_NOT_OK( ArrowArrayInitFromSchema(expected_array.get(), expected_schema.get(), nullptr)); expected_array->length = input.num_rows(); - expected_array->children[0]->length = input.num_rows(); - NANOARROW_THROW_NOT_OK( - ArrowBufferSetAllocator(ArrowArrayBuffer(expected_array->children[0], 0), noop_alloc)); - ArrowArrayValidityBitmap(expected_array->children[0])->buffer.data = - const_cast(reinterpret_cast(input.view().column(0).null_mask())); - - auto data_ptr = reinterpret_cast(result_dev_data->data()); - NANOARROW_THROW_NOT_OK(ArrowBufferSetAllocator( - ArrowArrayBuffer(expected_array->children[0], 1), - ArrowBufferDeallocator( - [](ArrowBufferAllocator* alloc, uint8_t*, int64_t) { - auto buf = - reinterpret_cast>*>(alloc->private_data); - delete buf; - }, - new std::unique_ptr>(std::move(result_dev_data))))); - ArrowArrayBuffer(expected_array->children[0], 1)->data = data_ptr; + populate_from_col(expected_array->children[0], input.view().column(0)); NANOARROW_THROW_NOT_OK( ArrowArrayFinishBuilding(expected_array.get(), NANOARROW_VALIDATION_LEVEL_NONE, nullptr)); @@ -795,10 +769,8 @@ TEST_F(ToArrowDeviceTest, FixedPoint64Table) ArrowSchemaInit(expected_schema.get()); NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(expected_schema.get(), 1)); ArrowSchemaInit(expected_schema->children[0]); - NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(expected_schema->children[0], - NANOARROW_TYPE_DECIMAL128, - cudf::detail::max_precision(), - -scale)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal( + expected_schema->children[0], NANOARROW_TYPE_DECIMAL64, 18, -scale)); NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(expected_schema->children[0], "a")); expected_schema->children[0]->flags = 0; @@ -806,36 +778,12 @@ TEST_F(ToArrowDeviceTest, FixedPoint64Table) cudf::to_arrow_schema(input.view(), std::vector{{"a"}}); compare_schemas(expected_schema.get(), got_arrow_schema.get()); - auto result_dev_data = std::make_unique>( - expect_data.size(), cudf::get_default_stream()); - cudaMemcpy(result_dev_data->data(), - expect_data.data(), - sizeof(int64_t) * expect_data.size(), - cudaMemcpyHostToDevice); - - cudf::get_default_stream().synchronize(); nanoarrow::UniqueArray expected_array; NANOARROW_THROW_NOT_OK( ArrowArrayInitFromSchema(expected_array.get(), expected_schema.get(), nullptr)); expected_array->length = input.num_rows(); - expected_array->children[0]->length = input.num_rows(); - NANOARROW_THROW_NOT_OK( - ArrowBufferSetAllocator(ArrowArrayBuffer(expected_array->children[0], 0), noop_alloc)); - ArrowArrayValidityBitmap(expected_array->children[0])->buffer.data = - const_cast(reinterpret_cast(input.view().column(0).null_mask())); - - auto data_ptr = reinterpret_cast(result_dev_data->data()); - NANOARROW_THROW_NOT_OK(ArrowBufferSetAllocator( - ArrowArrayBuffer(expected_array->children[0], 1), - ArrowBufferDeallocator( - [](ArrowBufferAllocator* alloc, uint8_t*, int64_t) { - auto buf = - reinterpret_cast>*>(alloc->private_data); - delete buf; - }, - new std::unique_ptr>(std::move(result_dev_data))))); - ArrowArrayBuffer(expected_array->children[0], 1)->data = data_ptr; + populate_from_col(expected_array->children[0], input.view().column(0)); NANOARROW_THROW_NOT_OK( ArrowArrayFinishBuilding(expected_array.get(), NANOARROW_VALIDATION_LEVEL_NONE, nullptr)); diff --git a/cpp/tests/interop/to_arrow_host_test.cpp b/cpp/tests/interop/to_arrow_host_test.cpp index fa3aa82fee2..44e06f56ab6 100644 --- a/cpp/tests/interop/to_arrow_host_test.cpp +++ b/cpp/tests/interop/to_arrow_host_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -586,13 +586,13 @@ TEST_F(ToArrowHostDeviceTest, FixedPoint32Table) auto const col = fp_wrapper({-1, 2, 3, 4, 5, 6}, scale_type{scale}); auto const input = cudf::table_view({col}); - auto const data = std::vector<__int128_t>{-1, 2, 3, 4, 5, 6}; + auto const data = std::vector{-1, 2, 3, 4, 5, 6}; nanoarrow::UniqueSchema expected_schema; ArrowSchemaInit(expected_schema.get()); NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(expected_schema.get(), 1)); ArrowSchemaInit(expected_schema->children[0]); NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(expected_schema->children[0], - NANOARROW_TYPE_DECIMAL128, + NANOARROW_TYPE_DECIMAL32, cudf::detail::max_precision(), -scale)); NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(expected_schema->children[0], "a")); @@ -603,7 +603,7 @@ TEST_F(ToArrowHostDeviceTest, FixedPoint32Table) ArrowArrayInitFromSchema(expected_array.get(), expected_schema.get(), nullptr)); expected_array->length = input.num_rows(); - get_nanoarrow_array<__int128_t>(data).move(expected_array->children[0]); + get_nanoarrow_array(data).move(expected_array->children[0]); NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(expected_array.get(), nullptr)); auto got_arrow_host = cudf::to_arrow_host(input); @@ -639,13 +639,13 @@ TEST_F(ToArrowHostDeviceTest, FixedPoint64Table) auto const col = fp_wrapper({-1, 2, 3, 4, 5, 6}, scale_type{scale}); auto const input = cudf::table_view({col}); - auto const data = std::vector<__int128_t>{-1, 2, 3, 4, 5, 6}; + auto const data = std::vector{-1, 2, 3, 4, 5, 6}; nanoarrow::UniqueSchema expected_schema; ArrowSchemaInit(expected_schema.get()); NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(expected_schema.get(), 1)); ArrowSchemaInit(expected_schema->children[0]); NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(expected_schema->children[0], - NANOARROW_TYPE_DECIMAL128, + NANOARROW_TYPE_DECIMAL64, cudf::detail::max_precision(), -scale)); NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(expected_schema->children[0], "a")); @@ -656,7 +656,7 @@ TEST_F(ToArrowHostDeviceTest, FixedPoint64Table) ArrowArrayInitFromSchema(expected_array.get(), expected_schema.get(), nullptr)); expected_array->length = input.num_rows(); - get_nanoarrow_array<__int128_t>(data).move(expected_array->children[0]); + get_nanoarrow_array(data).move(expected_array->children[0]); NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(expected_array.get(), nullptr)); auto got_arrow_host = cudf::to_arrow_host(input); @@ -748,7 +748,7 @@ TEST_F(ToArrowHostDeviceTest, FixedPoint32TableLarge) auto const col = fp_wrapper(iota, iota + NUM_ELEMENTS, scale_type{scale}); auto const input = cudf::table_view({col}); - auto expect_data = std::vector<__int128_t>(NUM_ELEMENTS); + auto expect_data = std::vector(NUM_ELEMENTS); std::iota(expect_data.begin(), expect_data.end(), 1); nanoarrow::UniqueSchema expected_schema; @@ -756,7 +756,7 @@ TEST_F(ToArrowHostDeviceTest, FixedPoint32TableLarge) NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(expected_schema.get(), 1)); ArrowSchemaInit(expected_schema->children[0]); NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(expected_schema->children[0], - NANOARROW_TYPE_DECIMAL128, + NANOARROW_TYPE_DECIMAL32, cudf::detail::max_precision(), -scale)); NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(expected_schema->children[0], "a")); @@ -767,7 +767,7 @@ TEST_F(ToArrowHostDeviceTest, FixedPoint32TableLarge) ArrowArrayInitFromSchema(expected_array.get(), expected_schema.get(), nullptr)); expected_array->length = input.num_rows(); - get_nanoarrow_array<__int128_t>(expect_data).move(expected_array->children[0]); + get_nanoarrow_array(expect_data).move(expected_array->children[0]); NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(expected_array.get(), nullptr)); auto got_arrow_host = cudf::to_arrow_host(input); @@ -805,7 +805,7 @@ TEST_F(ToArrowHostDeviceTest, FixedPoint64TableLarge) auto const col = fp_wrapper(iota, iota + NUM_ELEMENTS, scale_type{scale}); auto const input = cudf::table_view({col}); - auto expect_data = std::vector<__int128_t>(NUM_ELEMENTS); + auto expect_data = std::vector(NUM_ELEMENTS); std::iota(expect_data.begin(), expect_data.end(), 1); nanoarrow::UniqueSchema expected_schema; @@ -813,7 +813,7 @@ TEST_F(ToArrowHostDeviceTest, FixedPoint64TableLarge) NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(expected_schema.get(), 1)); ArrowSchemaInit(expected_schema->children[0]); NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(expected_schema->children[0], - NANOARROW_TYPE_DECIMAL128, + NANOARROW_TYPE_DECIMAL64, cudf::detail::max_precision(), -scale)); NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(expected_schema->children[0], "a")); @@ -824,7 +824,7 @@ TEST_F(ToArrowHostDeviceTest, FixedPoint64TableLarge) ArrowArrayInitFromSchema(expected_array.get(), expected_schema.get(), nullptr)); expected_array->length = input.num_rows(); - get_nanoarrow_array<__int128_t>(expect_data).move(expected_array->children[0]); + get_nanoarrow_array(expect_data).move(expected_array->children[0]); NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(expected_array.get(), nullptr)); auto got_arrow_host = cudf::to_arrow_host(input); @@ -914,7 +914,7 @@ TEST_F(ToArrowHostDeviceTest, FixedPoint32TableNullsSimple) using namespace numeric; for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) { - auto const data = std::vector<__int128_t>{1, 2, 3, 4, 5, 6, 0, 0}; + auto const data = std::vector{1, 2, 3, 4, 5, 6, 0, 0}; auto const validity = std::vector{1, 1, 1, 1, 1, 1, 0, 0}; auto const col = fp_wrapper({1, 2, 3, 4, 5, 6, 0, 0}, {1, 1, 1, 1, 1, 1, 0, 0}, scale_type{scale}); @@ -925,7 +925,7 @@ TEST_F(ToArrowHostDeviceTest, FixedPoint32TableNullsSimple) NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(expected_schema.get(), 1)); ArrowSchemaInit(expected_schema->children[0]); NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(expected_schema->children[0], - NANOARROW_TYPE_DECIMAL128, + NANOARROW_TYPE_DECIMAL32, cudf::detail::max_precision(), -scale)); NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(expected_schema->children[0], "a")); @@ -936,7 +936,7 @@ TEST_F(ToArrowHostDeviceTest, FixedPoint32TableNullsSimple) ArrowArrayInitFromSchema(expected_array.get(), expected_schema.get(), nullptr)); expected_array->length = input.num_rows(); - get_nanoarrow_array<__int128_t>(data, validity).move(expected_array->children[0]); + get_nanoarrow_array(data, validity).move(expected_array->children[0]); NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(expected_array.get(), nullptr)); auto got_arrow_host = cudf::to_arrow_host(input); @@ -969,7 +969,7 @@ TEST_F(ToArrowHostDeviceTest, FixedPoint64TableNullsSimple) using namespace numeric; for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) { - auto const data = std::vector<__int128_t>{1, 2, 3, 4, 5, 6, 0, 0}; + auto const data = std::vector{1, 2, 3, 4, 5, 6, 0, 0}; auto const validity = std::vector{1, 1, 1, 1, 1, 1, 0, 0}; auto const col = fp_wrapper({1, 2, 3, 4, 5, 6, 0, 0}, {1, 1, 1, 1, 1, 1, 0, 0}, scale_type{scale}); @@ -980,7 +980,7 @@ TEST_F(ToArrowHostDeviceTest, FixedPoint64TableNullsSimple) NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(expected_schema.get(), 1)); ArrowSchemaInit(expected_schema->children[0]); NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(expected_schema->children[0], - NANOARROW_TYPE_DECIMAL128, + NANOARROW_TYPE_DECIMAL64, cudf::detail::max_precision(), -scale)); NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(expected_schema->children[0], "a")); @@ -991,7 +991,7 @@ TEST_F(ToArrowHostDeviceTest, FixedPoint64TableNullsSimple) ArrowArrayInitFromSchema(expected_array.get(), expected_schema.get(), nullptr)); expected_array->length = input.num_rows(); - get_nanoarrow_array<__int128_t>(data, validity).move(expected_array->children[0]); + get_nanoarrow_array(data, validity).move(expected_array->children[0]); NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(expected_array.get(), nullptr)); auto got_arrow_host = cudf::to_arrow_host(input); diff --git a/cpp/tests/interop/to_arrow_test.cpp b/cpp/tests/interop/to_arrow_test.cpp index 28a80502f08..393dc098c9c 100644 --- a/cpp/tests/interop/to_arrow_test.cpp +++ b/cpp/tests/interop/to_arrow_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -258,7 +258,7 @@ TEST_F(ToArrowTest, NestedList) auto list_arr = get_arrow_list_array({6, 7, 8, 9}, {0, 1, 4}, {1, 0, 1, 1}); std::vector offset{0, 0, 2}; - auto mask_buffer = arrow::internal::BytesToBits({0, 1}).ValueOrDie(); + auto mask_buffer = arrow::internal::BytesToBits(std::vector({0, 1})).ValueOrDie(); auto nested_list_arr = std::make_shared( arrow::list(arrow::field("element", arrow::list(arrow::int64()), false)), offset.size() - 1, @@ -337,9 +337,10 @@ TEST_F(ToArrowTest, StructColumn) auto fields2 = std::vector>{ std::make_shared("string2", str2_array->type(), str2_array->null_count() > 0), std::make_shared("integral2", int2_array->type(), int2_array->null_count() > 0)}; - auto dtype2 = std::make_shared(fields2); - std::shared_ptr mask_buffer = arrow::internal::BytesToBits({1, 1, 0}).ValueOrDie(); - auto struct_array2 = std::make_shared( + auto dtype2 = std::make_shared(fields2); + std::shared_ptr mask_buffer = + arrow::internal::BytesToBits(std::vector({1, 1, 0})).ValueOrDie(); + auto struct_array2 = std::make_shared( dtype2, static_cast(input_view.num_rows()), child_arrays2, mask_buffer); std::vector> child_arrays( @@ -377,9 +378,9 @@ TEST_F(ToArrowTest, FixedPoint64Table) for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) { auto const col = fp_wrapper({-1, 2, 3, 4, 5, 6}, scale_type{scale}); auto const input = cudf::table_view({col}); - auto const expect_data = std::vector{-1, -1, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0}; + auto const expect_data = std::vector{-1, 2, 3, 4, 5, 6}; - auto const arr = make_decimal128_arrow_array(expect_data, std::nullopt, scale); + auto const arr = get_decimal_arrow_array(expect_data, std::nullopt, 18, scale); auto const field = arrow::field("a", arr->type()); auto const schema_vector = std::vector>({field}); @@ -400,7 +401,7 @@ TEST_F(ToArrowTest, FixedPoint128Table) auto const input = cudf::table_view({col}); auto const expect_data = std::vector<__int128_t>{-1, 2, 3, 4, 5, 6}; - auto const arr = make_decimal128_arrow_array(expect_data, std::nullopt, scale); + auto const arr = get_decimal_arrow_array(expect_data, std::nullopt, 38, scale); auto const field = arrow::field("a", arr->type()); auto const schema_vector = std::vector>({field}); @@ -415,20 +416,15 @@ TEST_F(ToArrowTest, FixedPoint128Table) TEST_F(ToArrowTest, FixedPoint64TableLarge) { using namespace numeric; - auto constexpr BIT_WIDTH_RATIO = 2; // Array::Type:type::DECIMAL (128) / int64_t - auto constexpr NUM_ELEMENTS = 1000; + auto constexpr NUM_ELEMENTS = 1000; for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) { - auto const iota = thrust::make_counting_iterator(1); - auto const col = fp_wrapper(iota, iota + NUM_ELEMENTS, scale_type{scale}); - auto const input = cudf::table_view({col}); - - auto const every_other = [](auto i) { return i % 2 == 0 ? i / 2 : 0; }; - auto const transform = cudf::detail::make_counting_transform_iterator(2, every_other); - auto const expect_data = - std::vector{transform, transform + NUM_ELEMENTS * BIT_WIDTH_RATIO}; + auto const iota = thrust::make_counting_iterator(1); + auto const col = fp_wrapper(iota, iota + NUM_ELEMENTS, scale_type{scale}); + auto const input = cudf::table_view({col}); + auto const expect_data = std::vector{iota, iota + NUM_ELEMENTS}; - auto const arr = make_decimal128_arrow_array(expect_data, std::nullopt, scale); + auto const arr = get_decimal_arrow_array(expect_data, std::nullopt, 18, scale); auto const field = arrow::field("a", arr->type()); auto const schema_vector = std::vector>({field}); @@ -451,7 +447,7 @@ TEST_F(ToArrowTest, FixedPoint128TableLarge) auto const input = cudf::table_view({col}); auto const expect_data = std::vector<__int128_t>{iota, iota + NUM_ELEMENTS}; - auto const arr = make_decimal128_arrow_array(expect_data, std::nullopt, scale); + auto const arr = get_decimal_arrow_array(expect_data, std::nullopt, 38, scale); auto const field = arrow::field("a", arr->type()); auto const schema_vector = std::vector>({field}); @@ -468,13 +464,13 @@ TEST_F(ToArrowTest, FixedPoint64TableNullsSimple) using namespace numeric; for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) { - auto const data = std::vector{1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 0, 0, 0, 0}; - auto const validity = std::vector{1, 1, 1, 1, 1, 1, 0, 0}; + auto const data = std::vector{1, 2, 3, 4, 5, 6, 0, 0}; + auto const validity = std::vector{1, 1, 1, 1, 1, 1, 0, 0}; auto const col = fp_wrapper({1, 2, 3, 4, 5, 6, 0, 0}, {1, 1, 1, 1, 1, 1, 0, 0}, scale_type{scale}); auto const input = cudf::table_view({col}); - auto const arr = make_decimal128_arrow_array(data, validity, scale); + auto const arr = get_decimal_arrow_array(data, validity, 18, scale); auto const field = arrow::field("a", arr->type()); auto const schema_vector = std::vector>({field}); @@ -492,12 +488,12 @@ TEST_F(ToArrowTest, FixedPoint128TableNullsSimple) for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) { auto const data = std::vector<__int128_t>{1, 2, 3, 4, 5, 6, 0, 0}; - auto const validity = std::vector{1, 1, 1, 1, 1, 1, 0, 0}; + auto const validity = std::vector{1, 1, 1, 1, 1, 1, 0, 0}; auto const col = fp_wrapper<__int128_t>({1, 2, 3, 4, 5, 6, 0, 0}, {1, 1, 1, 1, 1, 1, 0, 0}, scale_type{scale}); auto const input = cudf::table_view({col}); - auto const arr = make_decimal128_arrow_array(data, validity, scale); + auto const arr = get_decimal_arrow_array(data, validity, 38, scale); auto const field = arrow::field("a", arr->type()); auto const schema_vector = std::vector>({field}); @@ -518,11 +514,10 @@ TEST_F(ToArrowTest, FixedPoint64TableNulls) {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 0, 1, 0, 1, 0, 1, 0, 1, 0}, scale_type{scale}); auto const input = cudf::table_view({col}); - auto const expect_data = - std::vector{1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0, 8, 0, 9, 0, 10, 0}; - auto const validity = std::vector{1, 0, 1, 0, 1, 0, 1, 0, 1, 0}; + auto const expect_data = std::vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + auto const validity = std::vector{1, 0, 1, 0, 1, 0, 1, 0, 1, 0}; - auto arr = make_decimal128_arrow_array(expect_data, validity, scale); + auto const arr = get_decimal_arrow_array(expect_data, validity, 18, scale); auto const field = arrow::field("a", arr->type()); auto const schema_vector = std::vector>({field}); @@ -544,9 +539,9 @@ TEST_F(ToArrowTest, FixedPoint128TableNulls) auto const input = cudf::table_view({col}); auto const expect_data = std::vector<__int128_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; - auto const validity = std::vector{1, 0, 1, 0, 1, 0, 1, 0, 1, 0}; + auto const validity = std::vector{1, 0, 1, 0, 1, 0, 1, 0, 1, 0}; - auto arr = make_decimal128_arrow_array(expect_data, validity, scale); + auto const arr = get_decimal_arrow_array(expect_data, validity, 38, scale); auto const field = arrow::field("a", arr->type()); auto const schema_vector = std::vector>({field}); @@ -611,7 +606,9 @@ auto col_to_arrow_type(cudf::column_view const& col) case cudf::type_id::STRING: return arrow::utf8(); case cudf::type_id::LIST: return arrow::list(col_to_arrow_type(col.child(cudf::lists_column_view::child_column_index))); - case cudf::type_id::DECIMAL128: return arrow::decimal(38, -col.type().scale()); + case cudf::type_id::DECIMAL32: return arrow::decimal32(9, -col.type().scale()); + case cudf::type_id::DECIMAL64: return arrow::decimal64(18, -col.type().scale()); + case cudf::type_id::DECIMAL128: return arrow::decimal128(38, -col.type().scale()); default: CUDF_FAIL("Unsupported type_id conversion to arrow type", cudf::data_type_error); } } @@ -652,27 +649,36 @@ TYPED_TEST(ToArrowNumericScalarTest, Basic) struct ToArrowDecimalScalarTest : public cudf::test::BaseFixture {}; -// Only testing Decimal128 because that's the only size cudf and arrow have in common. -TEST_F(ToArrowDecimalScalarTest, Basic) +template +void check_decimal_scalar(int const value, arrow::Scalar const& ref_scalar, int32_t const scale) { - auto const value{42}; - auto const precision = - cudf::detail::max_precision<__int128_t>(); // cudf will convert to the widest-precision Arrow - // scalar of the type - int32_t const scale{4}; - auto const cudf_scalar = - cudf::make_fixed_point_scalar(value, numeric::scale_type{scale}); + cudf::make_fixed_point_scalar(value, numeric::scale_type{scale}); auto const maybe_scalar = cudf_scalar_to_arrow(*cudf_scalar); ASSERT_TRUE(maybe_scalar.has_value()); auto const arrow_scalar = *maybe_scalar; + EXPECT_TRUE(arrow_scalar->Equals(ref_scalar)); +} - auto const maybe_ref_arrow_scalar = - arrow::MakeScalar(arrow::decimal128(precision, -scale), value); - if (!maybe_ref_arrow_scalar.ok()) { CUDF_FAIL("Failed to construct reference scalar"); } - auto const ref_arrow_scalar = *maybe_ref_arrow_scalar; - EXPECT_TRUE(arrow_scalar->Equals(*ref_arrow_scalar)); +TEST_F(ToArrowDecimalScalarTest, Basic) +{ + auto const value{42}; + int32_t const scale{4}; + + auto const get_ref_scalar = [&](std::shared_ptr type) { + auto const maybe_ref_scalar = arrow::MakeScalar(type, value); + if (!maybe_ref_scalar.ok()) { CUDF_FAIL("Failed to construct reference scalar"); } + return *maybe_ref_scalar; + }; + + auto const decimal32_scalar = get_ref_scalar(arrow::decimal32(9, -scale)); + auto const decimal64_scalar = get_ref_scalar(arrow::decimal64(18, -scale)); + auto const decimal128_scalar = get_ref_scalar(arrow::decimal128(38, -scale)); + + check_decimal_scalar(value, *decimal32_scalar, scale); + check_decimal_scalar(value, *decimal64_scalar, scale); + check_decimal_scalar(value, *decimal128_scalar, scale); } struct ToArrowStringScalarTest : public cudf::test::BaseFixture {}; diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java index 7eb32892bad..642eb7f037c 100644 --- a/java/src/test/java/ai/rapids/cudf/TableTest.java +++ b/java/src/test/java/ai/rapids/cudf/TableTest.java @@ -40,6 +40,7 @@ import org.apache.parquet.schema.GroupType; import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.OriginalType; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import java.io.*; @@ -9462,6 +9463,9 @@ private Table castDecimal64To128(Table t) { } @Test + @Disabled("arrow-java does not yet support Decimal32/Decimal64, so now that" + + "we don't automatically upcast to decimal128 on conversion to arrow, we have" + + "to wait until it supports those types, then upgrade") void testArrowIPCWriteToFileWithNamesAndMetadata() throws IOException { File tempFile = File.createTempFile("test-names-metadata", ".arrow"); String[] columnNames = WriteUtils.getNonNestedColumns(false); @@ -9495,6 +9499,8 @@ void testArrowIPCWriteToFileWithNamesAndMetadata() throws IOException { } @Test + @Disabled("arrow-java does not yet support Decimal32/Decimal64, " + + "this can be re-enabled once it does and we upgrade") void testArrowIPCWriteToBufferChunked() { String[] nonNestedCols = WriteUtils.getNonNestedColumns(false); List columns = Lists.newArrayList(nonNestedCols); diff --git a/python/cudf/cudf/tests/test_reductions.py b/python/cudf/cudf/tests/test_reductions.py index e0bc8f32c9b..80ffce9e8be 100644 --- a/python/cudf/cudf/tests/test_reductions.py +++ b/python/cudf/cudf/tests/test_reductions.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# Copyright (c) 2020-2025, NVIDIA CORPORATION. from decimal import Decimal @@ -6,6 +6,7 @@ import numpy as np import pandas as pd +import pyarrow as pa import pytest import cudf @@ -53,10 +54,34 @@ def test_sum_string(): @pytest.mark.parametrize( "dtype", [ - Decimal64Dtype(6, 3), - Decimal64Dtype(10, 6), - Decimal64Dtype(16, 7), - Decimal32Dtype(6, 3), + pytest.param( + Decimal64Dtype(6, 3), + marks=pytest.mark.skipif( + pa._generated_version.version_tuple[0] < 19, + reason="decimal64 format string only supported in pyarrow >=19", + ), + ), + pytest.param( + Decimal64Dtype(10, 6), + marks=pytest.mark.skipif( + pa._generated_version.version_tuple[0] < 19, + reason="decimal64 format string only supported in pyarrow >=19", + ), + ), + pytest.param( + Decimal64Dtype(16, 7), + marks=pytest.mark.skipif( + pa._generated_version.version_tuple[0] < 19, + reason="decimal64 format string only supported in pyarrow >=19", + ), + ), + pytest.param( + Decimal32Dtype(6, 3), + marks=pytest.mark.skipif( + pa._generated_version.version_tuple[0] < 19, + reason="decimal32 format string only supported in pyarrow >=19", + ), + ), Decimal128Dtype(20, 7), ], ) @@ -93,10 +118,34 @@ def test_product(dtype, nelem): @pytest.mark.parametrize( "dtype", [ - Decimal64Dtype(6, 2), - Decimal64Dtype(8, 4), - Decimal64Dtype(10, 5), - Decimal32Dtype(6, 2), + pytest.param( + Decimal64Dtype(6, 2), + marks=pytest.mark.skipif( + pa._generated_version.version_tuple[0] < 19, + reason="decimal64 format string only supported in pyarrow >=19", + ), + ), + pytest.param( + Decimal64Dtype(8, 4), + marks=pytest.mark.skipif( + pa._generated_version.version_tuple[0] < 19, + reason="decimal64 format string only supported in pyarrow >=19", + ), + ), + pytest.param( + Decimal64Dtype(10, 5), + marks=pytest.mark.skipif( + pa._generated_version.version_tuple[0] < 19, + reason="decimal64 format string only supported in pyarrow >=19", + ), + ), + pytest.param( + Decimal32Dtype(6, 2), + marks=pytest.mark.skipif( + pa._generated_version.version_tuple[0] < 19, + reason="decimal32 format string only supported in pyarrow >=19", + ), + ), Decimal128Dtype(20, 5), ], ) @@ -141,11 +190,35 @@ def test_sum_of_squares(dtype, nelem): @pytest.mark.parametrize( "dtype", [ - Decimal64Dtype(6, 2), - Decimal64Dtype(8, 4), - Decimal64Dtype(10, 5), + pytest.param( + Decimal64Dtype(6, 2), + marks=pytest.mark.skipif( + pa._generated_version.version_tuple[0] < 19, + reason="decimal64 format string only supported in pyarrow >=19", + ), + ), + pytest.param( + Decimal64Dtype(8, 4), + marks=pytest.mark.skipif( + pa._generated_version.version_tuple[0] < 19, + reason="decimal64 format string only supported in pyarrow >=19", + ), + ), + pytest.param( + Decimal64Dtype(10, 5), + marks=pytest.mark.skipif( + pa._generated_version.version_tuple[0] < 19, + reason="decimal64 format string only supported in pyarrow >=19", + ), + ), Decimal128Dtype(20, 7), - Decimal32Dtype(6, 2), + pytest.param( + Decimal32Dtype(6, 2), + marks=pytest.mark.skipif( + pa._generated_version.version_tuple[0] < 19, + reason="decimal32 format string only supported in pyarrow >=19", + ), + ), ], ) def test_sum_of_squares_decimal(dtype): @@ -172,10 +245,34 @@ def test_min(dtype, nelem): @pytest.mark.parametrize( "dtype", [ - Decimal64Dtype(6, 3), - Decimal64Dtype(10, 6), - Decimal64Dtype(16, 7), - Decimal32Dtype(6, 3), + pytest.param( + Decimal64Dtype(6, 3), + marks=pytest.mark.skipif( + pa._generated_version.version_tuple[0] < 19, + reason="decimal64 format string only supported in pyarrow >=19", + ), + ), + pytest.param( + Decimal64Dtype(10, 6), + marks=pytest.mark.skipif( + pa._generated_version.version_tuple[0] < 19, + reason="decimal64 format string only supported in pyarrow >=19", + ), + ), + pytest.param( + Decimal64Dtype(16, 7), + marks=pytest.mark.skipif( + pa._generated_version.version_tuple[0] < 19, + reason="decimal64 format string only supported in pyarrow >=19", + ), + ), + pytest.param( + Decimal32Dtype(6, 3), + marks=pytest.mark.skipif( + pa._generated_version.version_tuple[0] < 19, + reason="decimal32 format string only supported in pyarrow >=19", + ), + ), Decimal128Dtype(20, 7), ], ) @@ -204,10 +301,34 @@ def test_max(dtype, nelem): @pytest.mark.parametrize( "dtype", [ - Decimal64Dtype(6, 3), - Decimal64Dtype(10, 6), - Decimal64Dtype(16, 7), - Decimal32Dtype(6, 3), + pytest.param( + Decimal64Dtype(6, 3), + marks=pytest.mark.skipif( + pa._generated_version.version_tuple[0] < 19, + reason="decimal64 format string only supported in pyarrow >=19", + ), + ), + pytest.param( + Decimal64Dtype(10, 6), + marks=pytest.mark.skipif( + pa._generated_version.version_tuple[0] < 19, + reason="decimal64 format string only supported in pyarrow >=19", + ), + ), + pytest.param( + Decimal64Dtype(16, 7), + marks=pytest.mark.skipif( + pa._generated_version.version_tuple[0] < 19, + reason="decimal64 format string only supported in pyarrow >=19", + ), + ), + pytest.param( + Decimal32Dtype(6, 3), + marks=pytest.mark.skipif( + pa._generated_version.version_tuple[0] < 19, + reason="decimal32 format string only supported in pyarrow >=19", + ), + ), Decimal128Dtype(20, 7), ], ) diff --git a/python/cudf/cudf/tests/test_scalar.py b/python/cudf/cudf/tests/test_scalar.py index 1e120cfb293..13a0d7b3ba1 100644 --- a/python/cudf/cudf/tests/test_scalar.py +++ b/python/cudf/cudf/tests/test_scalar.py @@ -163,7 +163,23 @@ def test_scalar_device_initialization(value): @pytest.mark.parametrize("value", DECIMAL_VALUES) @pytest.mark.parametrize( "decimal_type", - [cudf.Decimal32Dtype, cudf.Decimal64Dtype, cudf.Decimal128Dtype], + [ + pytest.param( + cudf.Decimal32Dtype, + marks=pytest.mark.skipif( + pa._generated_version.version_tuple[0] < 19, + reason="decimal32 format string only supported in pyarrow>=19", + ), + ), + pytest.param( + cudf.Decimal64Dtype, + marks=pytest.mark.skipif( + pa._generated_version.version_tuple[0] < 19, + reason="decimal64 format string only supported in pyarrow>=19", + ), + ), + cudf.Decimal128Dtype, + ], ) def test_scalar_device_initialization_decimal(value, decimal_type): dtype = decimal_type._from_decimal(value) @@ -381,6 +397,50 @@ def test_scalar_invalid_implicit_conversion(cls, dtype): cls(slr) +@pytest.mark.parametrize("value", SCALAR_VALUES + DECIMAL_VALUES) +@pytest.mark.parametrize( + "decimal_type", + [ + pytest.param( + cudf.Decimal32Dtype, + marks=pytest.mark.skipif( + pa._generated_version.version_tuple[0] < 19, + reason="decimal32 format string only supported in pyarrow>=19", + ), + ), + pytest.param( + cudf.Decimal64Dtype, + marks=pytest.mark.skipif( + pa._generated_version.version_tuple[0] < 19, + reason="decimal64 format string only supported in pyarrow>=19", + ), + ), + cudf.Decimal128Dtype, + ], +) +def test_device_scalar_direct_construction(value, decimal_type): + value = cudf.utils.dtypes.to_cudf_compatible_scalar(value) + + dtype = ( + value.dtype + if not isinstance(value, Decimal) + else decimal_type._from_decimal(value) + ) + + s = cudf.Scalar(value, dtype) + + assert s.value == value or np.isnan(s.value) and np.isnan(value) + if isinstance( + dtype, (cudf.Decimal64Dtype, cudf.Decimal128Dtype, cudf.Decimal32Dtype) + ): + assert s.dtype.precision == dtype.precision + assert s.dtype.scale == dtype.scale + elif dtype.char == "U": + assert s.dtype == "object" + else: + assert s.dtype == dtype + + @pytest.mark.parametrize("value", SCALAR_VALUES + DECIMAL_VALUES) def test_construct_from_scalar(value): value = cudf.utils.dtypes.to_cudf_compatible_scalar(value)