From d9b7a98b7adb17dc821eafea7adefd1f99978130 Mon Sep 17 00:00:00 2001
From: Matt Topol <zotthewizard@gmail.com>
Date: Wed, 29 Jan 2025 16:59:17 -0500
Subject: [PATCH] Avoid converting Decimal32/Decimal64 in `to_arrow` and
 `from_arrow` APIs (#17422)

Now that the Arrow format includes `Decimal32` and `Decimal64` data types, CUDF no longer needs to convert them to decimal128 when importing/exporting values via the `to_arrow` and `from_arrow` APIs. Instead we can just treat them like any other fixed-width data type and use the buffers directly.

This doesn't fully address https://github.com/rapidsai/cudf/issues/17080 as it doesn't make any changes to the Parquet side of things

This also incorporates the changes from https://github.com/rapidsai/cudf/pull/17405 which are needed for debug tests. That should get merged first, and then I can rebase this.

Authors:
  - Matt Topol (https://github.com/zeroshade)
  - David Wendt (https://github.com/davidwendt)
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Paul Mattione (https://github.com/pmattione-nvidia)
  - Bradley Dice (https://github.com/bdice)
  - Lawrence Mitchell (https://github.com/wence-)
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Robert (Bobby) Evans (https://github.com/revans2)
  - David Wendt (https://github.com/davidwendt)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/17422
---
 conda/recipes/cudf/meta.yaml                  |   2 +-
 conda/recipes/pylibcudf/meta.yaml             |   2 +-
 cpp/cmake/thirdparty/get_arrow.cmake          |   4 +-
 cpp/cmake/thirdparty/get_nanoarrow.cmake      |   6 +-
 .../patches/nanoarrow_override.json           |  18 ++
 cpp/src/interop/arrow_utilities.cpp           |   6 +-
 cpp/src/interop/arrow_utilities.hpp           |  18 +-
 cpp/src/interop/from_arrow_device.cu          |   9 +-
 cpp/src/interop/from_arrow_host.cu            |  11 +-
 cpp/src/interop/to_arrow_device.cu            | 102 +-------
 cpp/src/interop/to_arrow_host.cu              |  33 +--
 cpp/src/interop/to_arrow_schema.cpp           |  21 +-
 cpp/tests/interop/arrow_utils.hpp             |  50 ++--
 cpp/tests/interop/from_arrow_device_test.cpp  |  81 ++++---
 cpp/tests/interop/from_arrow_host_test.cpp    | 222 +++++++++++++++++-
 cpp/tests/interop/from_arrow_test.cpp         | 100 +++++---
 cpp/tests/interop/nanoarrow_utils.hpp         |  34 ++-
 cpp/tests/interop/to_arrow_device_test.cpp    |  66 +-----
 cpp/tests/interop/to_arrow_host_test.cpp      |  38 +--
 cpp/tests/interop/to_arrow_test.cpp           |  98 ++++----
 .../test/java/ai/rapids/cudf/TableTest.java   |   6 +
 python/cudf/cudf/tests/test_reductions.py     | 163 +++++++++++--
 python/cudf/cudf/tests/test_scalar.py         |  62 ++++-
 23 files changed, 735 insertions(+), 417 deletions(-)
 create mode 100644 cpp/cmake/thirdparty/patches/nanoarrow_override.json

diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml
index 5df7f97346a..83651c7972b 100644
--- a/conda/recipes/cudf/meta.yaml
+++ b/conda/recipes/cudf/meta.yaml
@@ -81,7 +81,7 @@ requirements:
     - numba-cuda >=0.2.0,<0.3.0a0
     - numba >=0.59.1,<0.61.0a0
     - numpy >=1.23,<3.0a0
-    - pyarrow>=14.0.0,<18.0.0a0
+    - pyarrow>=14.0.0,<20.0.0a0
     - libcudf ={{ version }}
     - pylibcudf ={{ version }}
     - {{ pin_compatible('rmm', max_pin='x.x') }}
diff --git a/conda/recipes/pylibcudf/meta.yaml b/conda/recipes/pylibcudf/meta.yaml
index 2ed52955f78..35e9108e9d8 100644
--- a/conda/recipes/pylibcudf/meta.yaml
+++ b/conda/recipes/pylibcudf/meta.yaml
@@ -77,7 +77,7 @@ requirements:
     - typing_extensions >=4.0.0
     - pandas >=2.0,<2.2.4dev0
     - numpy >=1.23,<3.0a0
-    - pyarrow>=14.0.0,<18.0.0a0
+    - pyarrow>=14.0.0,<20.0.0a0
     - {{ pin_compatible('rmm', max_pin='x.x') }}
     - fsspec >=0.6.0
     {% if cuda_major == "11" %}
diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake
index 07cbf5150f4..c519fa687c3 100644
--- a/cpp/cmake/thirdparty/get_arrow.cmake
+++ b/cpp/cmake/thirdparty/get_arrow.cmake
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -347,7 +347,7 @@ if(NOT DEFINED CUDF_VERSION_Arrow)
   set(CUDF_VERSION_Arrow
       # This version must be kept in sync with the libarrow version pinned for builds in
       # dependencies.yaml.
-      16.1.0
+      19.0.0
       CACHE STRING "The version of Arrow to find (or build)"
   )
 endif()
diff --git a/cpp/cmake/thirdparty/get_nanoarrow.cmake b/cpp/cmake/thirdparty/get_nanoarrow.cmake
index b0c48e04710..6765202cc5e 100644
--- a/cpp/cmake/thirdparty/get_nanoarrow.cmake
+++ b/cpp/cmake/thirdparty/get_nanoarrow.cmake
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -23,11 +23,11 @@ function(find_and_configure_nanoarrow)
   # Currently we need to always build nanoarrow so we don't pickup a previous installed version
   set(CPM_DOWNLOAD_nanoarrow ON)
   rapids_cpm_find(
-    nanoarrow 0.6.0.dev
+    nanoarrow 0.7.0.dev
     GLOBAL_TARGETS nanoarrow
     CPM_ARGS
     GIT_REPOSITORY https://github.com/apache/arrow-nanoarrow.git
-    GIT_TAG 1e2664a70ec14907409cadcceb14d79b9670bcdb
+    GIT_TAG 4bf5a9322626e95e3717e43de7616c0a256179eb
     GIT_SHALLOW FALSE
     OPTIONS "BUILD_SHARED_LIBS OFF" "NANOARROW_NAMESPACE cudf" ${_exclude_from_all}
   )
diff --git a/cpp/cmake/thirdparty/patches/nanoarrow_override.json b/cpp/cmake/thirdparty/patches/nanoarrow_override.json
new file mode 100644
index 00000000000..7dd4312a674
--- /dev/null
+++ b/cpp/cmake/thirdparty/patches/nanoarrow_override.json
@@ -0,0 +1,18 @@
+
+{
+  "packages" : {
+    "nanoarrow" : {
+      "version" : "0.7.0.dev",
+      "git_url" : "https://github.com/apache/arrow-nanoarrow.git",
+      "git_tag" : "4bf5a9322626e95e3717e43de7616c0a256179eb",
+      "git_shallow" : false,
+      "patches" : [
+        {
+          "file" : "${current_json_dir}/nanoarrow_clang_tidy_compliance.diff",
+          "issue" : "https://github.com/apache/arrow-nanoarrow/issues/537",
+          "fixed_in" : ""
+        }
+      ]
+    }
+  }
+}
diff --git a/cpp/src/interop/arrow_utilities.cpp b/cpp/src/interop/arrow_utilities.cpp
index c69ebe12d2c..1e137b8cbed 100644
--- a/cpp/src/interop/arrow_utilities.cpp
+++ b/cpp/src/interop/arrow_utilities.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -63,6 +63,8 @@ data_type arrow_to_cudf_type(ArrowSchemaView const* arrow_view)
         default: CUDF_FAIL("Unsupported duration unit in arrow", cudf::data_type_error);
       }
     }
+    case NANOARROW_TYPE_DECIMAL32: return data_type{type_id::DECIMAL32, -arrow_view->decimal_scale};
+    case NANOARROW_TYPE_DECIMAL64: return data_type{type_id::DECIMAL64, -arrow_view->decimal_scale};
     case NANOARROW_TYPE_DECIMAL128:
       return data_type{type_id::DECIMAL128, -arrow_view->decimal_scale};
     default: CUDF_FAIL("Unsupported type_id conversion to cudf", cudf::data_type_error);
@@ -84,6 +86,8 @@ ArrowType id_to_arrow_type(cudf::type_id id)
     case cudf::type_id::FLOAT32: return NANOARROW_TYPE_FLOAT;
     case cudf::type_id::FLOAT64: return NANOARROW_TYPE_DOUBLE;
     case cudf::type_id::TIMESTAMP_DAYS: return NANOARROW_TYPE_DATE32;
+    case cudf::type_id::DECIMAL32: return NANOARROW_TYPE_DECIMAL32;
+    case cudf::type_id::DECIMAL64: return NANOARROW_TYPE_DECIMAL64;
     case cudf::type_id::DECIMAL128: return NANOARROW_TYPE_DECIMAL128;
     default: CUDF_FAIL("Unsupported type_id conversion to arrow type", cudf::data_type_error);
   }
diff --git a/cpp/src/interop/arrow_utilities.hpp b/cpp/src/interop/arrow_utilities.hpp
index e4bdedf6603..522f8915049 100644
--- a/cpp/src/interop/arrow_utilities.hpp
+++ b/cpp/src/interop/arrow_utilities.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -70,21 +70,5 @@ ArrowType id_to_arrow_storage_type(cudf::type_id id);
  */
 int initialize_array(ArrowArray* arr, ArrowType storage_type, cudf::column_view column);
 
-/**
- * @brief Helper to convert decimal values to 128-bit versions for Arrow compatibility
- *
- * The template parameter should be the underlying type of the data (e.g. int32_t for
- * 32-bit decimal and int64_t for 64-bit decimal).
- *
- * @param input column_view of the data
- * @param stream cuda stream to perform the operations on
- * @param mr memory resource to allocate the returned device_uvector with
- * @return unique_ptr to a device_buffer containing the upcasted data
- */
-template <typename DeviceType>
-std::unique_ptr<rmm::device_buffer> decimals_to_arrow(cudf::column_view input,
-                                                      rmm::cuda_stream_view stream,
-                                                      rmm::device_async_resource_ref mr);
-
 }  // namespace detail
 }  // namespace cudf
diff --git a/cpp/src/interop/from_arrow_device.cu b/cpp/src/interop/from_arrow_device.cu
index cb3c4c55a61..29c4dfd35ac 100644
--- a/cpp/src/interop/from_arrow_device.cu
+++ b/cpp/src/interop/from_arrow_device.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -49,9 +49,7 @@ namespace {
 using dispatch_tuple_t = std::tuple<column_view, owned_columns_t>;
 
 struct dispatch_from_arrow_device {
-  template <typename T,
-            CUDF_ENABLE_IF(not is_rep_layout_compatible<T>() &&
-                           !std::is_same_v<T, numeric::decimal128>)>
+  template <typename T, CUDF_ENABLE_IF(not is_rep_layout_compatible<T>() && !is_fixed_point<T>())>
   dispatch_tuple_t operator()(ArrowSchemaView*,
                               ArrowArray const*,
                               data_type,
@@ -62,8 +60,7 @@ struct dispatch_from_arrow_device {
     CUDF_FAIL("Unsupported type in from_arrow_device", cudf::data_type_error);
   }
 
-  template <typename T,
-            CUDF_ENABLE_IF(is_rep_layout_compatible<T>() || std::is_same_v<T, numeric::decimal128>)>
+  template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>() || is_fixed_point<T>())>
   dispatch_tuple_t operator()(ArrowSchemaView* schema,
                               ArrowArray const* input,
                               data_type type,
diff --git a/cpp/src/interop/from_arrow_host.cu b/cpp/src/interop/from_arrow_host.cu
index b5d2427e288..ea5487a2960 100644
--- a/cpp/src/interop/from_arrow_host.cu
+++ b/cpp/src/interop/from_arrow_host.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -69,22 +69,19 @@ struct dispatch_copy_from_arrow_host {
     return mask;
   }
 
-  template <typename T,
-            CUDF_ENABLE_IF(not is_rep_layout_compatible<T>() &&
-                           !std::is_same_v<T, numeric::decimal128>)>
+  template <typename T, CUDF_ENABLE_IF(not is_rep_layout_compatible<T>() && !is_fixed_point<T>())>
   std::unique_ptr<column> operator()(ArrowSchemaView*, ArrowArray const*, data_type, bool)
   {
     CUDF_FAIL("Unsupported type in copy_from_arrow_host.");
   }
 
-  template <typename T,
-            CUDF_ENABLE_IF(is_rep_layout_compatible<T>() || std::is_same_v<T, numeric::decimal128>)>
+  template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>() || is_fixed_point<T>())>
   std::unique_ptr<column> operator()(ArrowSchemaView* schema,
                                      ArrowArray const* input,
                                      data_type type,
                                      bool skip_mask)
   {
-    using DeviceType = std::conditional_t<std::is_same_v<T, numeric::decimal128>, __int128_t, T>;
+    using DeviceType = device_storage_type_t<T>;
 
     size_type const num_rows   = input->length;
     size_type const offset     = input->offset;
diff --git a/cpp/src/interop/to_arrow_device.cu b/cpp/src/interop/to_arrow_device.cu
index fc1b0226a48..17eff1128f6 100644
--- a/cpp/src/interop/to_arrow_device.cu
+++ b/cpp/src/interop/to_arrow_device.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -92,13 +92,15 @@ int set_buffer(std::unique_ptr<T> device_buf, int64_t i, ArrowArray* out)
 }
 
 struct dispatch_to_arrow_device {
-  template <typename T, CUDF_ENABLE_IF(not is_rep_layout_compatible<T>())>
+  template <typename T,
+            CUDF_ENABLE_IF(not is_rep_layout_compatible<T>() and not is_fixed_point<T>())>
   int operator()(cudf::column&&, rmm::cuda_stream_view, rmm::device_async_resource_ref, ArrowArray*)
   {
     CUDF_FAIL("Unsupported type for to_arrow_device", cudf::data_type_error);
   }
 
-  template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
+  // cover rep layout compatible and decimal types
+  template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>() or is_fixed_point<T>())>
   int operator()(cudf::column&& column,
                  rmm::cuda_stream_view stream,
                  rmm::device_async_resource_ref mr,
@@ -132,64 +134,6 @@ struct dispatch_to_arrow_device {
   }
 };
 
-template <typename DeviceType>
-int construct_decimals(cudf::column_view input,
-                       rmm::cuda_stream_view stream,
-                       rmm::device_async_resource_ref mr,
-                       ArrowArray* out)
-{
-  nanoarrow::UniqueArray tmp;
-  NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), NANOARROW_TYPE_DECIMAL128, input));
-
-  auto buf = detail::convert_decimals_to_decimal128<DeviceType>(input, stream, mr);
-  // Synchronize stream here to ensure the decimal128 buffer is ready.
-  stream.synchronize();
-  NANOARROW_RETURN_NOT_OK(set_buffer(std::move(buf), fixed_width_data_buffer_idx, tmp.get()));
-
-  ArrowArrayMove(tmp.get(), out);
-  return NANOARROW_OK;
-}
-
-template <>
-int dispatch_to_arrow_device::operator()<numeric::decimal32>(cudf::column&& column,
-                                                             rmm::cuda_stream_view stream,
-                                                             rmm::device_async_resource_ref mr,
-                                                             ArrowArray* out)
-{
-  using DeviceType = int32_t;
-  NANOARROW_RETURN_NOT_OK(construct_decimals<DeviceType>(column.view(), stream, mr, out));
-  auto contents = column.release();
-  NANOARROW_RETURN_NOT_OK(set_null_mask(contents, out));
-  return NANOARROW_OK;
-}
-
-template <>
-int dispatch_to_arrow_device::operator()<numeric::decimal64>(cudf::column&& column,
-                                                             rmm::cuda_stream_view stream,
-                                                             rmm::device_async_resource_ref mr,
-                                                             ArrowArray* out)
-{
-  using DeviceType = int64_t;
-  NANOARROW_RETURN_NOT_OK(construct_decimals<DeviceType>(column.view(), stream, mr, out));
-  auto contents = column.release();
-  NANOARROW_RETURN_NOT_OK(set_null_mask(contents, out));
-  return NANOARROW_OK;
-}
-
-template <>
-int dispatch_to_arrow_device::operator()<numeric::decimal128>(cudf::column&& column,
-                                                              rmm::cuda_stream_view stream,
-                                                              rmm::device_async_resource_ref mr,
-                                                              ArrowArray* out)
-{
-  nanoarrow::UniqueArray tmp;
-  NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), NANOARROW_TYPE_DECIMAL128, column));
-  auto contents = column.release();
-  NANOARROW_RETURN_NOT_OK(set_contents(contents, tmp.get()));
-  ArrowArrayMove(tmp.get(), out);
-  return NANOARROW_OK;
-}
-
 template <>
 int dispatch_to_arrow_device::operator()<bool>(cudf::column&& column,
                                                rmm::cuda_stream_view stream,
@@ -350,13 +294,14 @@ struct dispatch_to_arrow_device_view {
   rmm::cuda_stream_view stream;
   rmm::device_async_resource_ref mr;
 
-  template <typename T, CUDF_ENABLE_IF(not is_rep_layout_compatible<T>())>
+  template <typename T,
+            CUDF_ENABLE_IF(not is_rep_layout_compatible<T>() and not is_fixed_point<T>())>
   int operator()(ArrowArray*) const
   {
     CUDF_FAIL("Unsupported type for to_arrow_device", cudf::data_type_error);
   }
 
-  template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
+  template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>() or is_fixed_point<T>())>
   int operator()(ArrowArray* out) const
   {
     nanoarrow::UniqueArray tmp;
@@ -404,37 +349,6 @@ struct dispatch_to_arrow_device_view {
   }
 };
 
-template <>
-int dispatch_to_arrow_device_view::operator()<numeric::decimal32>(ArrowArray* out) const
-{
-  using DeviceType = int32_t;
-  NANOARROW_RETURN_NOT_OK(construct_decimals<DeviceType>(column, stream, mr, out));
-  NANOARROW_RETURN_NOT_OK(set_null_mask(column, out));
-  return NANOARROW_OK;
-}
-
-template <>
-int dispatch_to_arrow_device_view::operator()<numeric::decimal64>(ArrowArray* out) const
-{
-  using DeviceType = int64_t;
-  NANOARROW_RETURN_NOT_OK(construct_decimals<DeviceType>(column, stream, mr, out));
-  NANOARROW_RETURN_NOT_OK(set_null_mask(column, out));
-  return NANOARROW_OK;
-}
-
-template <>
-int dispatch_to_arrow_device_view::operator()<numeric::decimal128>(ArrowArray* out) const
-{
-  nanoarrow::UniqueArray tmp;
-
-  NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), NANOARROW_TYPE_DECIMAL128, column));
-  NANOARROW_RETURN_NOT_OK(set_null_mask(column, tmp.get()));
-  NANOARROW_RETURN_NOT_OK(set_view_to_buffer(column, tmp.get()));
-
-  ArrowArrayMove(tmp.get(), out);
-  return NANOARROW_OK;
-}
-
 template <>
 int dispatch_to_arrow_device_view::operator()<bool>(ArrowArray* out) const
 {
diff --git a/cpp/src/interop/to_arrow_host.cu b/cpp/src/interop/to_arrow_host.cu
index 8ec0904f1ba..e93fdda0c1a 100644
--- a/cpp/src/interop/to_arrow_host.cu
+++ b/cpp/src/interop/to_arrow_host.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -115,8 +115,7 @@ struct dispatch_to_arrow_host {
     CUDF_FAIL("Unsupported type for to_arrow_host", cudf::data_type_error);
   }
 
-  template <typename T,
-            CUDF_ENABLE_IF(is_rep_layout_compatible<T>() || std::is_same_v<T, numeric::decimal128>)>
+  template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>() || is_fixed_point<T>())>
   int operator()(ArrowArray* out) const
   {
     nanoarrow::UniqueArray tmp;
@@ -125,7 +124,7 @@ struct dispatch_to_arrow_host {
     NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), storage_type, column));
 
     NANOARROW_RETURN_NOT_OK(populate_validity_bitmap(ArrowArrayValidityBitmap(tmp.get())));
-    using DataType = std::conditional_t<std::is_same_v<T, numeric::decimal128>, __int128_t, T>;
+    using DataType = device_storage_type_t<T>;
     NANOARROW_RETURN_NOT_OK(
       populate_data_buffer(device_span<DataType const>(column.data<DataType>(), column.size()),
                            ArrowArrayBuffer(tmp.get(), fixed_width_data_buffer_idx)));
@@ -133,32 +132,6 @@ struct dispatch_to_arrow_host {
     ArrowArrayMove(tmp.get(), out);
     return NANOARROW_OK;
   }
-
-  // convert decimal types from libcudf to arrow where those types are not directly
-  // supported by Arrow. These types must be fit into 128 bits, the smallest
-  // decimal resolution supported by Arrow
-  template <typename T,
-            CUDF_ENABLE_IF(!is_rep_layout_compatible<T>() &&
-                           (std::is_same_v<T, numeric::decimal32> ||
-                            std::is_same_v<T, numeric::decimal64>))>
-  int operator()(ArrowArray* out) const
-  {
-    using DeviceType = std::conditional_t<std::is_same_v<T, numeric::decimal32>, int32_t, int64_t>;
-    nanoarrow::UniqueArray tmp;
-    NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), NANOARROW_TYPE_DECIMAL128, column));
-
-    NANOARROW_RETURN_NOT_OK(populate_validity_bitmap(ArrowArrayValidityBitmap(tmp.get())));
-    auto buf = detail::convert_decimals_to_decimal128<DeviceType>(column, stream, mr);
-    // No need to synchronize stream here as populate_data_buffer uses the same stream to copy data
-    // to host.
-    NANOARROW_RETURN_NOT_OK(
-      populate_data_buffer(device_span<__int128_t const>(
-                             reinterpret_cast<const __int128_t*>(buf->data()), column.size()),
-                           ArrowArrayBuffer(tmp.get(), fixed_width_data_buffer_idx)));
-
-    ArrowArrayMove(tmp.get(), out);
-    return NANOARROW_OK;
-  }
 };
 
 int get_column(cudf::column_view column,
diff --git a/cpp/src/interop/to_arrow_schema.cpp b/cpp/src/interop/to_arrow_schema.cpp
index 5dd8d77c261..7ffee53dac8 100644
--- a/cpp/src/interop/to_arrow_schema.cpp
+++ b/cpp/src/interop/to_arrow_schema.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -76,15 +76,10 @@ struct dispatch_to_arrow_type {
 };
 
 template <typename DeviceType>
-int decimals_to_arrow(column_view input, ArrowSchema* out)
+int decimals_to_arrow(column_view input, int32_t precision, ArrowSchema* out)
 {
-  // Arrow doesn't support decimal32/decimal64 currently. decimal128
-  // is the smallest that arrow supports besides float32/float64 so we
-  // upcast to decimal128.
-  return ArrowSchemaSetTypeDecimal(out,
-                                   NANOARROW_TYPE_DECIMAL128,
-                                   cudf::detail::max_precision<DeviceType>(),
-                                   -input.type().scale());
+  return ArrowSchemaSetTypeDecimal(
+    out, id_to_arrow_type(input.type().id()), precision, -input.type().scale());
 }
 
 template <>
@@ -93,7 +88,7 @@ int dispatch_to_arrow_type::operator()<numeric::decimal32>(column_view input,
                                                            ArrowSchema* out)
 {
   using DeviceType = int32_t;
-  return decimals_to_arrow<DeviceType>(input, out);
+  return decimals_to_arrow<DeviceType>(input, cudf::detail::max_precision<DeviceType>(), out);
 }
 
 template <>
@@ -102,7 +97,9 @@ int dispatch_to_arrow_type::operator()<numeric::decimal64>(column_view input,
                                                            ArrowSchema* out)
 {
   using DeviceType = int64_t;
-  return decimals_to_arrow<DeviceType>(input, out);
+  // Arrow decimal 64 maxes at precision of 18, cudf::detail::max_precision<int64_t>() produces 19.
+  // decimal32 has precision 1 - 9, decimal64 has precision 10 - 18, decimal128 is 19 - 38
+  return decimals_to_arrow<DeviceType>(input, cudf::detail::max_precision<DeviceType>() - 1, out);
 }
 
 template <>
@@ -111,7 +108,7 @@ int dispatch_to_arrow_type::operator()<numeric::decimal128>(column_view input,
                                                             ArrowSchema* out)
 {
   using DeviceType = __int128_t;
-  return decimals_to_arrow<DeviceType>(input, out);
+  return decimals_to_arrow<DeviceType>(input, cudf::detail::max_precision<DeviceType>(), out);
 }
 
 template <>
diff --git a/cpp/tests/interop/arrow_utils.hpp b/cpp/tests/interop/arrow_utils.hpp
index e785845394a..6041aa14b60 100644
--- a/cpp/tests/interop/arrow_utils.hpp
+++ b/cpp/tests/interop/arrow_utils.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -238,28 +238,34 @@ std::pair<std::unique_ptr<cudf::table>, std::shared_ptr<arrow::Table>> get_table
   cudf::size_type length = 10000);
 
 template <typename T>
-[[nodiscard]] auto make_decimal128_arrow_array(std::vector<T> const& data,
-                                               std::optional<std::vector<int>> const& validity,
-                                               int32_t scale) -> std::shared_ptr<arrow::Array>
+std::enable_if_t<std::disjunction_v<std::is_same<T, int32_t>,
+                                    std::is_same<T, int64_t>,
+                                    std::is_same<T, __int128_t>>,
+                 std::shared_ptr<arrow::Array>>
+get_decimal_arrow_array(std::vector<T> const& data,
+                        std::optional<std::vector<uint8_t>> const& validity,
+                        int32_t precision,
+                        int32_t scale)
 {
-  auto constexpr BIT_WIDTH_RATIO = sizeof(__int128_t) / sizeof(T);
-
-  std::shared_ptr<arrow::Array> arr;
-  arrow::Decimal128Builder decimal_builder(arrow::decimal(cudf::detail::max_precision<T>(), -scale),
-                                           arrow::default_memory_pool());
-
-  for (T i = 0; i < static_cast<T>(data.size() / BIT_WIDTH_RATIO); ++i) {
-    if (validity.has_value() and not validity.value()[i]) {
-      CUDF_EXPECTS(decimal_builder.AppendNull().ok(), "Failed to append");
-    } else {
-      CUDF_EXPECTS(
-        decimal_builder.Append(reinterpret_cast<uint8_t const*>(data.data() + BIT_WIDTH_RATIO * i))
-          .ok(),
-        "Failed to append");
-    }
-  }
+  std::shared_ptr<arrow::Buffer> data_buffer;
+  arrow::BufferBuilder buff_builder;
+  CUDF_EXPECTS(buff_builder.Append(data.data(), sizeof(T) * data.size()).ok(),
+               "Failed to append values to buffer builder");
+  CUDF_EXPECTS(buff_builder.Finish(&data_buffer).ok(), "Failed to allocate buffer");
+
+  std::shared_ptr<arrow::Buffer> mask_buffer =
+    !validity.has_value() ? nullptr : arrow::internal::BytesToBits(validity.value()).ValueOrDie();
 
-  CUDF_EXPECTS(decimal_builder.Finish(&arr).ok(), "Failed to build array");
+  std::shared_ptr<arrow::DataType> data_type;
+  if constexpr (std::is_same_v<T, int32_t>) {
+    data_type = arrow::decimal32(precision, -scale);
+  } else if constexpr (std::is_same_v<T, int64_t>) {
+    data_type = arrow::decimal64(precision, -scale);
+  } else {
+    data_type = arrow::decimal128(precision, -scale);
+  }
 
-  return arr;
+  auto array_data = std::make_shared<arrow::ArrayData>(
+    data_type, data.size(), std::vector<std::shared_ptr<arrow::Buffer>>{mask_buffer, data_buffer});
+  return arrow::MakeArray(array_data);
 }
diff --git a/cpp/tests/interop/from_arrow_device_test.cpp b/cpp/tests/interop/from_arrow_device_test.cpp
index 1ddc33e749a..3f332c9f46b 100644
--- a/cpp/tests/interop/from_arrow_device_test.cpp
+++ b/cpp/tests/interop/from_arrow_device_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -40,7 +40,12 @@ struct FromArrowDeviceTest : public cudf::test::BaseFixture {};
 template <typename T>
 struct FromArrowDeviceTestDurationsTest : public cudf::test::BaseFixture {};
 
+template <typename T>
+struct FromArrowDeviceTestDecimalsTest : public cudf::test::BaseFixture {};
+
 TYPED_TEST_SUITE(FromArrowDeviceTestDurationsTest, cudf::test::DurationTypes);
+using FixedPointTypes = cudf::test::Types<int32_t, int64_t, __int128_t>;
+TYPED_TEST_SUITE(FromArrowDeviceTestDecimalsTest, FixedPointTypes);
 
 TEST_F(FromArrowDeviceTest, FailConditions)
 {
@@ -568,23 +573,24 @@ INSTANTIATE_TEST_CASE_P(FromArrowDeviceTest,
 template <typename T>
 using fp_wrapper = cudf::test::fixed_point_column_wrapper<T>;
 
-TEST_F(FromArrowDeviceTest, FixedPoint128Table)
+TYPED_TEST(FromArrowDeviceTestDecimalsTest, FixedPointTable)
 {
+  using T = TypeParam;
   using namespace numeric;
 
+  auto const precision = get_decimal_precision<T>();
+
   for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) {
-    auto const data     = std::vector<__int128_t>{1, 2, 3, 4, 5, 6};
-    auto const col      = fp_wrapper<__int128_t>(data.cbegin(), data.cend(), scale_type{scale});
+    auto const data     = std::vector<T>{1, 2, 3, 4, 5, 6};
+    auto const col      = fp_wrapper<T>(data.cbegin(), data.cend(), scale_type{scale});
     auto const expected = cudf::table_view({col});
 
     nanoarrow::UniqueSchema input_schema;
     ArrowSchemaInit(input_schema.get());
     NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(input_schema.get(), 1));
     ArrowSchemaInit(input_schema->children[0]);
-    NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(input_schema->children[0],
-                                                     NANOARROW_TYPE_DECIMAL128,
-                                                     cudf::detail::max_precision<__int128_t>(),
-                                                     -scale));
+    NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(
+      input_schema->children[0], nanoarrow_decimal_type<T>::type, precision, -scale));
     NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(input_schema->children[0], "a"));
 
     nanoarrow::UniqueArray input_array;
@@ -592,7 +598,7 @@ TEST_F(FromArrowDeviceTest, FixedPoint128Table)
       ArrowArrayInitFromSchema(input_array.get(), input_schema.get(), nullptr));
     input_array->length = expected.num_rows();
 
-    populate_from_col<__int128_t>(input_array->children[0], expected.column(0));
+    populate_from_col<T>(input_array->children[0], expected.column(0));
     NANOARROW_THROW_NOT_OK(
       ArrowArrayFinishBuilding(input_array.get(), NANOARROW_VALIDATION_LEVEL_NONE, nullptr));
 
@@ -613,25 +619,26 @@ TEST_F(FromArrowDeviceTest, FixedPoint128Table)
   }
 }
 
-TEST_F(FromArrowDeviceTest, FixedPoint128TableLarge)
+TYPED_TEST(FromArrowDeviceTestDecimalsTest, FixedPointTableLarge)
 {
+  using T = TypeParam;
   using namespace numeric;
+
+  auto const precision        = get_decimal_precision<T>();
   auto constexpr NUM_ELEMENTS = 1000;
 
   for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) {
     auto iota           = thrust::make_counting_iterator(1);
-    auto const data     = std::vector<__int128_t>(iota, iota + NUM_ELEMENTS);
-    auto const col      = fp_wrapper<__int128_t>(iota, iota + NUM_ELEMENTS, scale_type{scale});
+    auto const data     = std::vector<T>(iota, iota + NUM_ELEMENTS);
+    auto const col      = fp_wrapper<T>(iota, iota + NUM_ELEMENTS, scale_type{scale});
     auto const expected = cudf::table_view({col});
 
     nanoarrow::UniqueSchema input_schema;
     ArrowSchemaInit(input_schema.get());
     NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(input_schema.get(), 1));
     ArrowSchemaInit(input_schema->children[0]);
-    NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(input_schema->children[0],
-                                                     NANOARROW_TYPE_DECIMAL128,
-                                                     cudf::detail::max_precision<__int128_t>(),
-                                                     -scale));
+    NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(
+      input_schema->children[0], nanoarrow_decimal_type<T>::type, precision, -scale));
     NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(input_schema->children[0], "a"));
 
     nanoarrow::UniqueArray input_array;
@@ -639,7 +646,7 @@ TEST_F(FromArrowDeviceTest, FixedPoint128TableLarge)
       ArrowArrayInitFromSchema(input_array.get(), input_schema.get(), nullptr));
     input_array->length = expected.num_rows();
 
-    populate_from_col<__int128_t>(input_array->children[0], expected.column(0));
+    populate_from_col<T>(input_array->children[0], expected.column(0));
     NANOARROW_THROW_NOT_OK(
       ArrowArrayFinishBuilding(input_array.get(), NANOARROW_VALIDATION_LEVEL_NONE, nullptr));
 
@@ -660,25 +667,26 @@ TEST_F(FromArrowDeviceTest, FixedPoint128TableLarge)
   }
 }
 
-TEST_F(FromArrowDeviceTest, FixedPoint128TableNulls)
+TYPED_TEST(FromArrowDeviceTestDecimalsTest, FixedPointTableNulls)
 {
+  using T = TypeParam;
   using namespace numeric;
 
+  auto const precision = get_decimal_precision<T>();
+
   for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) {
-    auto const data     = std::vector<__int128_t>{1, 2, 3, 4, 5, 6, 0, 0};
+    auto const data     = std::vector<T>{1, 2, 3, 4, 5, 6, 0, 0};
     auto const validity = std::vector<int32_t>{1, 1, 1, 1, 1, 1, 0, 0};
     auto const col =
-      fp_wrapper<__int128_t>({1, 2, 3, 4, 5, 6, 0, 0}, {1, 1, 1, 1, 1, 1, 0, 0}, scale_type{scale});
+      fp_wrapper<T>({1, 2, 3, 4, 5, 6, 0, 0}, {1, 1, 1, 1, 1, 1, 0, 0}, scale_type{scale});
     auto const expected = cudf::table_view({col});
 
     nanoarrow::UniqueSchema input_schema;
     ArrowSchemaInit(input_schema.get());
     NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(input_schema.get(), 1));
     ArrowSchemaInit(input_schema->children[0]);
-    NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(input_schema->children[0],
-                                                     NANOARROW_TYPE_DECIMAL128,
-                                                     cudf::detail::max_precision<__int128_t>(),
-                                                     -scale));
+    NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(
+      input_schema->children[0], nanoarrow_decimal_type<T>::type, precision, -scale));
     NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(input_schema->children[0], "a"));
 
     nanoarrow::UniqueArray input_array;
@@ -686,7 +694,7 @@ TEST_F(FromArrowDeviceTest, FixedPoint128TableNulls)
       ArrowArrayInitFromSchema(input_array.get(), input_schema.get(), nullptr));
     input_array->length = expected.num_rows();
 
-    populate_from_col<__int128_t>(input_array->children[0], expected.column(0));
+    populate_from_col<T>(input_array->children[0], expected.column(0));
     NANOARROW_THROW_NOT_OK(
       ArrowArrayFinishBuilding(input_array.get(), NANOARROW_VALIDATION_LEVEL_NONE, nullptr));
 
@@ -707,27 +715,28 @@ TEST_F(FromArrowDeviceTest, FixedPoint128TableNulls)
   }
 }
 
-TEST_F(FromArrowDeviceTest, FixedPoint128TableNullsLarge)
+TYPED_TEST(FromArrowDeviceTestDecimalsTest, FixedPointTableNullsLarge)
 {
+  using T = TypeParam;
   using namespace numeric;
+
+  auto const precision        = get_decimal_precision<T>();
   auto constexpr NUM_ELEMENTS = 1000;
 
   for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) {
-    auto every_other = [](auto i) { return i % 2 ? 0 : 1; };
-    auto validity    = cudf::detail::make_counting_transform_iterator(0, every_other);
-    auto iota        = thrust::make_counting_iterator(1);
-    auto const data  = std::vector<__int128_t>(iota, iota + NUM_ELEMENTS);
-    auto const col = fp_wrapper<__int128_t>(iota, iota + NUM_ELEMENTS, validity, scale_type{scale});
+    auto every_other    = [](auto i) { return i % 2 ? 0 : 1; };
+    auto validity       = cudf::detail::make_counting_transform_iterator(0, every_other);
+    auto iota           = thrust::make_counting_iterator(1);
+    auto const data     = std::vector<T>(iota, iota + NUM_ELEMENTS);
+    auto const col      = fp_wrapper<T>(iota, iota + NUM_ELEMENTS, validity, scale_type{scale});
     auto const expected = cudf::table_view({col});
 
     nanoarrow::UniqueSchema input_schema;
     ArrowSchemaInit(input_schema.get());
     NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(input_schema.get(), 1));
     ArrowSchemaInit(input_schema->children[0]);
-    NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(input_schema->children[0],
-                                                     NANOARROW_TYPE_DECIMAL128,
-                                                     cudf::detail::max_precision<__int128_t>(),
-                                                     -scale));
+    NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(
+      input_schema->children[0], nanoarrow_decimal_type<T>::type, precision, -scale));
     NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(input_schema->children[0], "a"));
 
     nanoarrow::UniqueArray input_array;
@@ -735,7 +744,7 @@ TEST_F(FromArrowDeviceTest, FixedPoint128TableNullsLarge)
       ArrowArrayInitFromSchema(input_array.get(), input_schema.get(), nullptr));
     input_array->length = expected.num_rows();
 
-    populate_from_col<__int128_t>(input_array->children[0], expected.column(0));
+    populate_from_col<T>(input_array->children[0], expected.column(0));
     NANOARROW_THROW_NOT_OK(
       ArrowArrayFinishBuilding(input_array.get(), NANOARROW_VALIDATION_LEVEL_NONE, nullptr));
 
diff --git a/cpp/tests/interop/from_arrow_host_test.cpp b/cpp/tests/interop/from_arrow_host_test.cpp
index 1ab11b374b6..72c673eff11 100644
--- a/cpp/tests/interop/from_arrow_host_test.cpp
+++ b/cpp/tests/interop/from_arrow_host_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,6 +25,7 @@
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_view.hpp>
 #include <cudf/copying.hpp>
+#include <cudf/detail/interop.hpp>
 #include <cudf/dictionary/dictionary_column_view.hpp>
 #include <cudf/dictionary/dictionary_factories.hpp>
 #include <cudf/interop.hpp>
@@ -95,7 +96,12 @@ struct FromArrowHostDeviceTest : public cudf::test::BaseFixture {};
 template <typename T>
 struct FromArrowHostDeviceTestDurationsTest : public cudf::test::BaseFixture {};
 
+template <typename T>
+struct FromArrowHostDeviceTestDecimalsTest : public cudf::test::BaseFixture {};
+
 TYPED_TEST_SUITE(FromArrowHostDeviceTestDurationsTest, cudf::test::DurationTypes);
+using FixedPointTypes = cudf::test::Types<int32_t, int64_t, __int128_t>;
+TYPED_TEST_SUITE(FromArrowHostDeviceTestDecimalsTest, FixedPointTypes);
 
 TEST_F(FromArrowHostDeviceTest, EmptyTable)
 {
@@ -215,6 +221,220 @@ TYPED_TEST(FromArrowHostDeviceTestDurationsTest, DurationTable)
   CUDF_TEST_EXPECT_TABLES_EQUAL(got_cudf_table->view(), from_struct);
 }
 
+template <typename T>
+using fp_wrapper = cudf::test::fixed_point_column_wrapper<T>;
+
+TYPED_TEST(FromArrowHostDeviceTestDecimalsTest, FixedPointTable)
+{
+  using T = TypeParam;
+  using namespace numeric;
+
+  auto const precision = get_decimal_precision<T>();
+  for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) {
+    auto const data     = std::vector<T>{1, 2, 3, 4, 5, 6};
+    auto const col      = fp_wrapper<T>(data.cbegin(), data.cend(), scale_type{scale});
+    auto const expected = cudf::table_view({col});
+
+    nanoarrow::UniqueSchema input_schema;
+    ArrowSchemaInit(input_schema.get());
+    NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(input_schema.get(), 1));
+    ArrowSchemaInit(input_schema->children[0]);
+    NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(
+      input_schema->children[0], nanoarrow_decimal_type<T>::type, precision, -scale));
+    NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(input_schema->children[0], "a"));
+
+    nanoarrow::UniqueArray input_array;
+    NANOARROW_THROW_NOT_OK(
+      ArrowArrayInitFromSchema(input_array.get(), input_schema.get(), nullptr));
+    input_array->length     = expected.num_rows();
+    input_array->null_count = 0;
+
+    auto arr = get_nanoarrow_array<T>(data);
+    arr.move(input_array->children[0]);
+    NANOARROW_THROW_NOT_OK(
+      ArrowArrayFinishBuilding(input_array.get(), NANOARROW_VALIDATION_LEVEL_MINIMAL, nullptr));
+
+    ArrowDeviceArray input;
+    memcpy(&input.array, input_array.get(), sizeof(ArrowArray));
+    input.device_id   = -1;
+    input.device_type = ARROW_DEVICE_CPU;
+
+    // converting arrow host memory to cudf table gives us the expected table
+    auto got_cudf_table = cudf::from_arrow_host(input_schema.get(), &input);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table->view());
+
+    // converting to a cudf table with a single struct column gives us the expected
+    // result column
+    auto got_cudf_col = cudf::from_arrow_host_column(input_schema.get(), &input);
+    EXPECT_EQ(got_cudf_col->type(), cudf::data_type{cudf::type_id::STRUCT});
+    auto got_cudf_col_view = got_cudf_col->view();
+    cudf::table_view from_struct{std::vector<cudf::column_view>(got_cudf_col_view.child_begin(),
+                                                                got_cudf_col_view.child_end())};
+    CUDF_TEST_EXPECT_TABLES_EQUAL(got_cudf_table->view(), from_struct);
+  }
+}
+
+TYPED_TEST(FromArrowHostDeviceTestDecimalsTest, FixedPointTableLarge)
+{
+  using T = TypeParam;
+  using namespace numeric;
+
+  auto const precision        = get_decimal_precision<T>();
+  auto constexpr NUM_ELEMENTS = 1000;
+
+  for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) {
+    auto iota           = thrust::make_counting_iterator(1);
+    auto const data     = std::vector<T>(iota, iota + NUM_ELEMENTS);
+    auto const col      = fp_wrapper<T>(iota, iota + NUM_ELEMENTS, scale_type{scale});
+    auto const expected = cudf::table_view({col});
+
+    nanoarrow::UniqueSchema input_schema;
+    ArrowSchemaInit(input_schema.get());
+    NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(input_schema.get(), 1));
+    ArrowSchemaInit(input_schema->children[0]);
+    NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(
+      input_schema->children[0], nanoarrow_decimal_type<T>::type, precision, -scale));
+    NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(input_schema->children[0], "a"));
+
+    nanoarrow::UniqueArray input_array;
+    NANOARROW_THROW_NOT_OK(
+      ArrowArrayInitFromSchema(input_array.get(), input_schema.get(), nullptr));
+    input_array->length     = expected.num_rows();
+    input_array->null_count = 0;
+
+    auto arr = get_nanoarrow_array<T>(data);
+    arr.move(input_array->children[0]);
+    NANOARROW_THROW_NOT_OK(
+      ArrowArrayFinishBuilding(input_array.get(), NANOARROW_VALIDATION_LEVEL_MINIMAL, nullptr));
+
+    ArrowDeviceArray input;
+    memcpy(&input.array, input_array.get(), sizeof(ArrowArray));
+    input.device_id   = -1;
+    input.device_type = ARROW_DEVICE_CPU;
+
+    // converting arrow host memory to cudf table gives us the expected table
+    auto got_cudf_table = cudf::from_arrow_host(input_schema.get(), &input);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table->view());
+
+    // converting to a cudf table with a single struct column gives us the expected
+    // result column
+    auto got_cudf_col = cudf::from_arrow_host_column(input_schema.get(), &input);
+    EXPECT_EQ(got_cudf_col->type(), cudf::data_type{cudf::type_id::STRUCT});
+    auto got_cudf_col_view = got_cudf_col->view();
+    cudf::table_view from_struct{std::vector<cudf::column_view>(got_cudf_col_view.child_begin(),
+                                                                got_cudf_col_view.child_end())};
+    CUDF_TEST_EXPECT_TABLES_EQUAL(got_cudf_table->view(), from_struct);
+  }
+}
+
+TYPED_TEST(FromArrowHostDeviceTestDecimalsTest, FixedPointTableNulls)
+{
+  using T = TypeParam;
+  using namespace numeric;
+
+  auto const precision = get_decimal_precision<T>();
+  for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) {
+    auto const data     = std::vector<T>{1, 2, 3, 4, 5, 6};
+    auto const validity = std::vector<uint8_t>{1, 1, 1, 1, 1, 1, 0, 0};
+    auto const col = fp_wrapper<T>({1, 2, 3, 4, 5, 6}, {1, 1, 1, 1, 1, 1, 0, 0}, scale_type{scale});
+    auto const expected = cudf::table_view({col});
+
+    nanoarrow::UniqueSchema input_schema;
+    ArrowSchemaInit(input_schema.get());
+    NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(input_schema.get(), 1));
+    ArrowSchemaInit(input_schema->children[0]);
+    NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(
+      input_schema->children[0], nanoarrow_decimal_type<T>::type, precision, -scale));
+    NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(input_schema->children[0], "a"));
+
+    nanoarrow::UniqueArray input_array;
+    NANOARROW_THROW_NOT_OK(
+      ArrowArrayInitFromSchema(input_array.get(), input_schema.get(), nullptr));
+    input_array->length = expected.num_rows();
+
+    auto arr = get_nanoarrow_array<T>(data, validity);
+    arr.move(input_array->children[0]);
+    NANOARROW_THROW_NOT_OK(
+      ArrowArrayFinishBuilding(input_array.get(), NANOARROW_VALIDATION_LEVEL_MINIMAL, nullptr));
+
+    ArrowDeviceArray input;
+    memcpy(&input.array, input_array.get(), sizeof(ArrowArray));
+    input.device_id   = -1;
+    input.device_type = ARROW_DEVICE_CPU;
+
+    // converting arrow host memory to cudf table gives us the expected table
+    auto got_cudf_table = cudf::from_arrow_host(input_schema.get(), &input);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table->view());
+
+    // converting to a cudf table with a single struct column gives us the expected
+    // result column
+    auto got_cudf_col = cudf::from_arrow_host_column(input_schema.get(), &input);
+    EXPECT_EQ(got_cudf_col->type(), cudf::data_type{cudf::type_id::STRUCT});
+    auto got_cudf_col_view = got_cudf_col->view();
+    cudf::table_view from_struct{std::vector<cudf::column_view>(got_cudf_col_view.child_begin(),
+                                                                got_cudf_col_view.child_end())};
+    CUDF_TEST_EXPECT_TABLES_EQUAL(got_cudf_table->view(), from_struct);
+  }
+}
+
+TYPED_TEST(FromArrowHostDeviceTestDecimalsTest, FixedPointTableLargeNulls)
+{
+  using T = TypeParam;
+  using namespace numeric;
+
+  auto const precision        = get_decimal_precision<T>();
+  auto constexpr NUM_ELEMENTS = 1000;
+
+  for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) {
+    auto every_other = [](auto i) { return i % 2 ? 0 : 1; };
+    auto validity    = cudf::detail::make_counting_transform_iterator(0, every_other);
+    std::vector<uint8_t> validity_vec(validity, validity + NUM_ELEMENTS);
+    auto iota           = thrust::make_counting_iterator(1);
+    auto const data     = std::vector<T>(iota, iota + NUM_ELEMENTS);
+    auto const col      = fp_wrapper<T>(iota,
+                                   iota + NUM_ELEMENTS,
+                                   cudf::detail::make_counting_transform_iterator(0, every_other),
+                                   scale_type{scale});
+    auto const expected = cudf::table_view({col});
+
+    nanoarrow::UniqueSchema input_schema;
+    ArrowSchemaInit(input_schema.get());
+    NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(input_schema.get(), 1));
+    ArrowSchemaInit(input_schema->children[0]);
+    NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(
+      input_schema->children[0], nanoarrow_decimal_type<T>::type, precision, -scale));
+    NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(input_schema->children[0], "a"));
+
+    nanoarrow::UniqueArray input_array;
+    NANOARROW_THROW_NOT_OK(
+      ArrowArrayInitFromSchema(input_array.get(), input_schema.get(), nullptr));
+    input_array->length = expected.num_rows();
+
+    auto arr = get_nanoarrow_array<T>(data, validity_vec);
+    arr.move(input_array->children[0]);
+    NANOARROW_THROW_NOT_OK(
+      ArrowArrayFinishBuilding(input_array.get(), NANOARROW_VALIDATION_LEVEL_MINIMAL, nullptr));
+
+    ArrowDeviceArray input;
+    memcpy(&input.array, input_array.get(), sizeof(ArrowArray));
+    input.device_id   = -1;
+    input.device_type = ARROW_DEVICE_CPU;
+
+    // converting arrow host memory to cudf table gives us the expected table
+    auto got_cudf_table = cudf::from_arrow_host(input_schema.get(), &input);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table->view());
+
+    // converting to a cudf table with a single struct column gives us the expected
+    // result column
+    auto got_cudf_col = cudf::from_arrow_host_column(input_schema.get(), &input);
+    EXPECT_EQ(got_cudf_col->type(), cudf::data_type{cudf::type_id::STRUCT});
+    auto got_cudf_col_view = got_cudf_col->view();
+    cudf::table_view from_struct{std::vector<cudf::column_view>(got_cudf_col_view.child_begin(),
+                                                                got_cudf_col_view.child_end())};
+    CUDF_TEST_EXPECT_TABLES_EQUAL(got_cudf_table->view(), from_struct);
+  }
+}
+
 TEST_F(FromArrowHostDeviceTest, NestedList)
 {
   auto valids =
diff --git a/cpp/tests/interop/from_arrow_test.cpp b/cpp/tests/interop/from_arrow_test.cpp
index 62e38cbfd45..31a3230e084 100644
--- a/cpp/tests/interop/from_arrow_test.cpp
+++ b/cpp/tests/interop/from_arrow_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,6 +15,7 @@
  */
 
 #include <tests/interop/arrow_utils.hpp>
+#include <tests/interop/nanoarrow_utils.hpp>
 
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
@@ -86,6 +87,9 @@ struct FromArrowTest : public cudf::test::BaseFixture {};
 template <typename T>
 struct FromArrowTestDurationsTest : public cudf::test::BaseFixture {};
 
+template <typename T>
+struct FromArrowTestDecimalsTest : public cudf::test::BaseFixture {};
+
 std::optional<std::unique_ptr<cudf::table>> export_table(std::shared_ptr<arrow::Table> arrow_table)
 {
   ArrowSchema schema;
@@ -126,6 +130,8 @@ std::optional<std::unique_ptr<cudf::scalar>> export_scalar(
 }
 
 TYPED_TEST_SUITE(FromArrowTestDurationsTest, cudf::test::DurationTypes);
+using FixedPointTypes = cudf::test::Types<int32_t, int64_t, __int128_t>;
+TYPED_TEST_SUITE(FromArrowTestDecimalsTest, FixedPointTypes);
 
 TEST_F(FromArrowTest, EmptyTable)
 {
@@ -210,7 +216,7 @@ TEST_F(FromArrowTest, NestedList)
 
   auto list_arr = get_arrow_list_array<int64_t>({6, 7, 8, 9}, {0, 1, 4}, {1, 0, 1, 1});
   std::vector<int32_t> offset{0, 0, 2};
-  auto mask_buffer     = arrow::internal::BytesToBits({0, 1}).ValueOrDie();
+  auto mask_buffer     = arrow::internal::BytesToBits(std::vector<uint8_t>({0, 1})).ValueOrDie();
   auto nested_list_arr = std::make_shared<arrow::ListArray>(arrow::list(list(arrow::int64())),
                                                             offset.size() - 1,
                                                             arrow::Buffer::Wrap(offset),
@@ -288,9 +294,10 @@ TEST_F(FromArrowTest, StructColumn)
   auto fields2 = std::vector<std::shared_ptr<arrow::Field>>{
     std::make_shared<arrow::Field>("string2", str2_array->type(), str2_array->null_count() > 0),
     std::make_shared<arrow::Field>("integral2", int2_array->type(), int2_array->null_count() > 0)};
-  std::shared_ptr<arrow::Buffer> mask_buffer = arrow::internal::BytesToBits({1, 1, 0}).ValueOrDie();
-  auto dtype2                                = std::make_shared<arrow::StructType>(fields2);
-  auto struct_array2                         = std::make_shared<arrow::StructArray>(
+  std::shared_ptr<arrow::Buffer> mask_buffer =
+    arrow::internal::BytesToBits(std::vector<uint8_t>({1, 1, 0})).ValueOrDie();
+  auto dtype2        = std::make_shared<arrow::StructType>(fields2);
+  auto struct_array2 = std::make_shared<arrow::StructArray>(
     dtype2, static_cast<int64_t>(expected_cudf_table.num_rows()), child_arrays2, mask_buffer);
 
   std::vector<std::shared_ptr<arrow::Array>> child_arrays(
@@ -444,16 +451,18 @@ TEST_P(FromArrowTestSlice, SliceTest)
 template <typename T>
 using fp_wrapper = cudf::test::fixed_point_column_wrapper<T>;
 
-TEST_F(FromArrowTest, FixedPoint128Table)
+TYPED_TEST(FromArrowTestDecimalsTest, FixedPointTable)
 {
+  using T = TypeParam;
   using namespace numeric;
 
+  auto const precision = get_decimal_precision<T>();
   for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) {
-    auto const data     = std::vector<__int128_t>{1, 2, 3, 4, 5, 6};
-    auto const col      = fp_wrapper<__int128_t>(data.cbegin(), data.cend(), scale_type{scale});
+    auto const data     = std::vector<T>{1, 2, 3, 4, 5, 6};
+    auto const col      = fp_wrapper<T>(data.cbegin(), data.cend(), scale_type{scale});
     auto const expected = cudf::table_view({col});
 
-    auto const arr = make_decimal128_arrow_array(data, std::nullopt, scale);
+    auto const arr = get_decimal_arrow_array(data, std::nullopt, precision, scale);
 
     auto const field         = arrow::field("a", arr->type());
     auto const schema_vector = std::vector<std::shared_ptr<arrow::Field>>({field});
@@ -467,18 +476,21 @@ TEST_F(FromArrowTest, FixedPoint128Table)
   }
 }
 
-TEST_F(FromArrowTest, FixedPoint128TableLarge)
+TYPED_TEST(FromArrowTestDecimalsTest, FixedPointTableLarge)
 {
+  using T = TypeParam;
   using namespace numeric;
+
+  auto const precision        = get_decimal_precision<T>();
   auto constexpr NUM_ELEMENTS = 1000;
 
   for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) {
     auto iota           = thrust::make_counting_iterator(1);
-    auto const data     = std::vector<__int128_t>(iota, iota + NUM_ELEMENTS);
-    auto const col      = fp_wrapper<__int128_t>(iota, iota + NUM_ELEMENTS, scale_type{scale});
+    auto const data     = std::vector<T>(iota, iota + NUM_ELEMENTS);
+    auto const col      = fp_wrapper<T>(iota, iota + NUM_ELEMENTS, scale_type{scale});
     auto const expected = cudf::table_view({col});
 
-    auto const arr = make_decimal128_arrow_array(data, std::nullopt, scale);
+    auto const arr = get_decimal_arrow_array(data, std::nullopt, precision, scale);
 
     auto const field         = arrow::field("a", arr->type());
     auto const schema_vector = std::vector<std::shared_ptr<arrow::Field>>({field});
@@ -492,19 +504,21 @@ TEST_F(FromArrowTest, FixedPoint128TableLarge)
   }
 }
 
-TEST_F(FromArrowTest, FixedPoint128TableNulls)
+TYPED_TEST(FromArrowTestDecimalsTest, FixedPointTableNulls)
 {
+  using T = TypeParam;
   using namespace numeric;
 
+  auto const precision = get_decimal_precision<T>();
   for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) {
-    auto const data     = std::vector<__int128_t>{1, 2, 3, 4, 5, 6, 0, 0};
-    auto const validity = std::vector<int32_t>{1, 1, 1, 1, 1, 1, 0, 0};
-    auto const col      = fp_wrapper<__int128_t>({1, 2, 3, 4, 5, 6, 0, 0},
-                                            {true, true, true, true, true, true, false, false},
-                                            scale_type{scale});
+    auto const data     = std::vector<T>{1, 2, 3, 4, 5, 6, 0, 0};
+    auto const validity = std::vector<uint8_t>{1, 1, 1, 1, 1, 1, 0, 0};
+    auto const col      = fp_wrapper<T>({1, 2, 3, 4, 5, 6, 0, 0},
+                                   {true, true, true, true, true, true, false, false},
+                                   scale_type{scale});
     auto const expected = cudf::table_view({col});
 
-    auto const arr = make_decimal128_arrow_array(data, validity, scale);
+    auto const arr = get_decimal_arrow_array(data, validity, precision, scale);
 
     auto const field         = arrow::field("a", arr->type());
     auto const schema_vector = std::vector<std::shared_ptr<arrow::Field>>({field});
@@ -518,21 +532,24 @@ TEST_F(FromArrowTest, FixedPoint128TableNulls)
   }
 }
 
-TEST_F(FromArrowTest, FixedPoint128TableNullsLarge)
+TYPED_TEST(FromArrowTestDecimalsTest, FixedPointTableNullsLarge)
 {
+  using T = TypeParam;
   using namespace numeric;
+
+  auto const precision        = get_decimal_precision<T>();
   auto constexpr NUM_ELEMENTS = 1000;
 
   for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) {
-    auto every_other = [](auto i) { return i % 2 ? 0 : 1; };
-    auto validity    = cudf::detail::make_counting_transform_iterator(0, every_other);
-    auto iota        = thrust::make_counting_iterator(1);
-    auto const data  = std::vector<__int128_t>(iota, iota + NUM_ELEMENTS);
-    auto const col = fp_wrapper<__int128_t>(iota, iota + NUM_ELEMENTS, validity, scale_type{scale});
+    auto every_other    = [](auto i) { return i % 2 ? 0 : 1; };
+    auto validity       = cudf::detail::make_counting_transform_iterator(0, every_other);
+    auto iota           = thrust::make_counting_iterator(1);
+    auto const data     = std::vector<T>(iota, iota + NUM_ELEMENTS);
+    auto const col      = fp_wrapper<T>(iota, iota + NUM_ELEMENTS, validity, scale_type{scale});
     auto const expected = cudf::table_view({col});
 
-    auto const arr = make_decimal128_arrow_array(
-      data, std::vector<int32_t>(validity, validity + NUM_ELEMENTS), scale);
+    auto const arr = get_decimal_arrow_array(
+      data, std::vector<uint8_t>(validity, validity + NUM_ELEMENTS), precision, scale);
 
     auto const field         = arrow::field("a", arr->type());
     auto const schema_vector = std::vector<std::shared_ptr<arrow::Field>>({field});
@@ -579,24 +596,33 @@ TYPED_TEST(FromArrowNumericScalarTest, Basic)
 
 struct FromArrowDecimalScalarTest : public cudf::test::BaseFixture {};
 
-// Only testing Decimal128 because that's the only size cudf and arrow have in common.
-TEST_F(FromArrowDecimalScalarTest, Basic)
+template <typename ScalarType, typename DecimalType>
+void check_decimal_scalar(const int value, ScalarType const& arrow_scalar)
 {
-  auto const value{42};
-  auto const precision{8};
   auto const scale{4};
-  auto arrow_scalar      = arrow::Decimal128Scalar(value, arrow::decimal128(precision, -scale));
   auto const cudf_scalar = export_scalar(arrow_scalar);
   ASSERT_TRUE(cudf_scalar.has_value());
 
-  // Arrow offers a minimum of 128 bits for the Decimal type.
   auto const cudf_decimal_scalar =
-    dynamic_cast<cudf::fixed_point_scalar<numeric::decimal128>*>(cudf_scalar.value().get());
-  EXPECT_EQ(cudf_decimal_scalar->type(),
-            cudf::data_type(cudf::type_to_id<numeric::decimal128>(), scale));
+    dynamic_cast<cudf::fixed_point_scalar<DecimalType>*>(cudf_scalar.value().get());
+  EXPECT_EQ(cudf_decimal_scalar->type(), cudf::data_type(cudf::type_to_id<DecimalType>(), scale));
   EXPECT_EQ(cudf_decimal_scalar->value(), value);
 }
 
+TEST_F(FromArrowDecimalScalarTest, Basic)
+{
+  auto const value{42};
+  auto const precision{8};
+  auto const scale{4};
+  auto arrow_scalar32  = arrow::Decimal32Scalar(value, arrow::decimal32(precision, -scale));
+  auto arrow_scalar64  = arrow::Decimal64Scalar(value, arrow::decimal64(precision, -scale));
+  auto arrow_scalar128 = arrow::Decimal128Scalar(value, arrow::decimal128(precision, -scale));
+
+  check_decimal_scalar<arrow::Decimal32Scalar, numeric::decimal32>(value, arrow_scalar32);
+  check_decimal_scalar<arrow::Decimal64Scalar, numeric::decimal64>(value, arrow_scalar64);
+  check_decimal_scalar<arrow::Decimal128Scalar, numeric::decimal128>(value, arrow_scalar128);
+}
+
 struct FromArrowStringScalarTest : public cudf::test::BaseFixture {};
 
 TEST_F(FromArrowStringScalarTest, Basic)
diff --git a/cpp/tests/interop/nanoarrow_utils.hpp b/cpp/tests/interop/nanoarrow_utils.hpp
index b7b8202a3c2..a1211a16e10 100644
--- a/cpp/tests/interop/nanoarrow_utils.hpp
+++ b/cpp/tests/interop/nanoarrow_utils.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <cudf/column/column_view.hpp>
+#include <cudf/detail/interop.hpp>
 #include <cudf/dictionary/dictionary_column_view.hpp>
 #include <cudf/lists/lists_column_view.hpp>
 #include <cudf/null_mask.hpp>
@@ -217,6 +218,24 @@ DEFINE_NANOARROW_STORAGE(__int128_t, DECIMAL128);
 
 #undef DEFINE_NANOARROW_STORAGE
 
+template <typename T>
+struct nanoarrow_decimal_type {};
+
+template <>
+struct nanoarrow_decimal_type<int32_t> {
+  static constexpr ArrowType type = NANOARROW_TYPE_DECIMAL32;
+};
+
+template <>
+struct nanoarrow_decimal_type<int64_t> {
+  static constexpr ArrowType type = NANOARROW_TYPE_DECIMAL64;
+};
+
+template <>
+struct nanoarrow_decimal_type<__int128_t> {
+  static constexpr ArrowType type = NANOARROW_TYPE_DECIMAL128;
+};
+
 template <typename T>
 std::enable_if_t<cudf::is_fixed_width<T>() and !std::is_same_v<T, bool>, nanoarrow::UniqueArray>
 get_nanoarrow_array(std::vector<T> const& data, std::vector<uint8_t> const& mask = {})
@@ -384,3 +403,16 @@ std::tuple<std::unique_ptr<cudf::table>, nanoarrow::UniqueSchema, nanoarrow::Uni
 get_nanoarrow_host_tables(cudf::size_type length);
 
 void slice_host_nanoarrow(ArrowArray* arr, int64_t start, int64_t end);
+
+template <typename T>
+std::enable_if_t<std::disjunction_v<std::is_same<T, int32_t>,
+                                    std::is_same<T, int64_t>,
+                                    std::is_same<T, __int128_t>>,
+                 std::size_t>
+get_decimal_precision()
+{
+  if constexpr (std::is_same_v<T, int64_t>)
+    return 18;
+  else
+    return cudf::detail::max_precision<T>();
+}
diff --git a/cpp/tests/interop/to_arrow_device_test.cpp b/cpp/tests/interop/to_arrow_device_test.cpp
index 112b3e1d8e2..7fcb1478196 100644
--- a/cpp/tests/interop/to_arrow_device_test.cpp
+++ b/cpp/tests/interop/to_arrow_device_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -708,9 +708,7 @@ TEST_F(ToArrowDeviceTest, FixedPoint32Table)
   using namespace numeric;
 
   for (auto const scale : {6, 4, 2, 0, -1, -3, -5}) {
-    auto const expect_data =
-      std::vector<int32_t>{-1000, -1, -1, -1, 2400, 0, 0, 0, -3456, -1, -1, -1,
-                           4650,  0,  0,  0,  5154, 0, 0, 0, 6800,  0,  0,  0};
+    auto const expect_data = std::vector<int32_t>{-1000, 2400, -3456, 4650, 5154, 6800};
     auto col = fp_wrapper<int32_t>({-1000, 2400, -3456, 4650, 5154, 6800}, scale_type{scale});
     std::vector<std::unique_ptr<cudf::column>> table_cols;
     table_cols.emplace_back(col.release());
@@ -721,7 +719,7 @@ TEST_F(ToArrowDeviceTest, FixedPoint32Table)
     NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(expected_schema.get(), 1));
     ArrowSchemaInit(expected_schema->children[0]);
     NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(expected_schema->children[0],
-                                                     NANOARROW_TYPE_DECIMAL128,
+                                                     NANOARROW_TYPE_DECIMAL32,
                                                      cudf::detail::max_precision<int32_t>(),
                                                      -scale));
     NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(expected_schema->children[0], "a"));
@@ -731,36 +729,12 @@ TEST_F(ToArrowDeviceTest, FixedPoint32Table)
       cudf::to_arrow_schema(input.view(), std::vector<cudf::column_metadata>{{"a"}});
     compare_schemas(expected_schema.get(), got_arrow_schema.get());
 
-    auto result_dev_data = std::make_unique<rmm::device_uvector<int32_t>>(
-      expect_data.size(), cudf::get_default_stream());
-    cudaMemcpy(result_dev_data->data(),
-               expect_data.data(),
-               sizeof(int32_t) * expect_data.size(),
-               cudaMemcpyHostToDevice);
-
-    cudf::get_default_stream().synchronize();
     nanoarrow::UniqueArray expected_array;
     NANOARROW_THROW_NOT_OK(
       ArrowArrayInitFromSchema(expected_array.get(), expected_schema.get(), nullptr));
     expected_array->length = input.num_rows();
 
-    expected_array->children[0]->length = input.num_rows();
-    NANOARROW_THROW_NOT_OK(
-      ArrowBufferSetAllocator(ArrowArrayBuffer(expected_array->children[0], 0), noop_alloc));
-    ArrowArrayValidityBitmap(expected_array->children[0])->buffer.data =
-      const_cast<uint8_t*>(reinterpret_cast<uint8_t const*>(input.view().column(0).null_mask()));
-
-    auto data_ptr = reinterpret_cast<uint8_t*>(result_dev_data->data());
-    NANOARROW_THROW_NOT_OK(ArrowBufferSetAllocator(
-      ArrowArrayBuffer(expected_array->children[0], 1),
-      ArrowBufferDeallocator(
-        [](ArrowBufferAllocator* alloc, uint8_t*, int64_t) {
-          auto buf =
-            reinterpret_cast<std::unique_ptr<rmm::device_uvector<int32_t>>*>(alloc->private_data);
-          delete buf;
-        },
-        new std::unique_ptr<rmm::device_uvector<int32_t>>(std::move(result_dev_data)))));
-    ArrowArrayBuffer(expected_array->children[0], 1)->data = data_ptr;
+    populate_from_col<int64_t>(expected_array->children[0], input.view().column(0));
     NANOARROW_THROW_NOT_OK(
       ArrowArrayFinishBuilding(expected_array.get(), NANOARROW_VALIDATION_LEVEL_NONE, nullptr));
 
@@ -795,10 +769,8 @@ TEST_F(ToArrowDeviceTest, FixedPoint64Table)
     ArrowSchemaInit(expected_schema.get());
     NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(expected_schema.get(), 1));
     ArrowSchemaInit(expected_schema->children[0]);
-    NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(expected_schema->children[0],
-                                                     NANOARROW_TYPE_DECIMAL128,
-                                                     cudf::detail::max_precision<int64_t>(),
-                                                     -scale));
+    NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(
+      expected_schema->children[0], NANOARROW_TYPE_DECIMAL64, 18, -scale));
     NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(expected_schema->children[0], "a"));
     expected_schema->children[0]->flags = 0;
 
@@ -806,36 +778,12 @@ TEST_F(ToArrowDeviceTest, FixedPoint64Table)
       cudf::to_arrow_schema(input.view(), std::vector<cudf::column_metadata>{{"a"}});
     compare_schemas(expected_schema.get(), got_arrow_schema.get());
 
-    auto result_dev_data = std::make_unique<rmm::device_uvector<int64_t>>(
-      expect_data.size(), cudf::get_default_stream());
-    cudaMemcpy(result_dev_data->data(),
-               expect_data.data(),
-               sizeof(int64_t) * expect_data.size(),
-               cudaMemcpyHostToDevice);
-
-    cudf::get_default_stream().synchronize();
     nanoarrow::UniqueArray expected_array;
     NANOARROW_THROW_NOT_OK(
       ArrowArrayInitFromSchema(expected_array.get(), expected_schema.get(), nullptr));
     expected_array->length = input.num_rows();
 
-    expected_array->children[0]->length = input.num_rows();
-    NANOARROW_THROW_NOT_OK(
-      ArrowBufferSetAllocator(ArrowArrayBuffer(expected_array->children[0], 0), noop_alloc));
-    ArrowArrayValidityBitmap(expected_array->children[0])->buffer.data =
-      const_cast<uint8_t*>(reinterpret_cast<uint8_t const*>(input.view().column(0).null_mask()));
-
-    auto data_ptr = reinterpret_cast<uint8_t*>(result_dev_data->data());
-    NANOARROW_THROW_NOT_OK(ArrowBufferSetAllocator(
-      ArrowArrayBuffer(expected_array->children[0], 1),
-      ArrowBufferDeallocator(
-        [](ArrowBufferAllocator* alloc, uint8_t*, int64_t) {
-          auto buf =
-            reinterpret_cast<std::unique_ptr<rmm::device_uvector<int64_t>>*>(alloc->private_data);
-          delete buf;
-        },
-        new std::unique_ptr<rmm::device_uvector<int64_t>>(std::move(result_dev_data)))));
-    ArrowArrayBuffer(expected_array->children[0], 1)->data = data_ptr;
+    populate_from_col<int32_t>(expected_array->children[0], input.view().column(0));
     NANOARROW_THROW_NOT_OK(
       ArrowArrayFinishBuilding(expected_array.get(), NANOARROW_VALIDATION_LEVEL_NONE, nullptr));
 
diff --git a/cpp/tests/interop/to_arrow_host_test.cpp b/cpp/tests/interop/to_arrow_host_test.cpp
index fa3aa82fee2..44e06f56ab6 100644
--- a/cpp/tests/interop/to_arrow_host_test.cpp
+++ b/cpp/tests/interop/to_arrow_host_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024, NVIDIA CORPORATION.
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -586,13 +586,13 @@ TEST_F(ToArrowHostDeviceTest, FixedPoint32Table)
     auto const col   = fp_wrapper<int32_t>({-1, 2, 3, 4, 5, 6}, scale_type{scale});
     auto const input = cudf::table_view({col});
 
-    auto const data = std::vector<__int128_t>{-1, 2, 3, 4, 5, 6};
+    auto const data = std::vector<int32_t>{-1, 2, 3, 4, 5, 6};
     nanoarrow::UniqueSchema expected_schema;
     ArrowSchemaInit(expected_schema.get());
     NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(expected_schema.get(), 1));
     ArrowSchemaInit(expected_schema->children[0]);
     NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(expected_schema->children[0],
-                                                     NANOARROW_TYPE_DECIMAL128,
+                                                     NANOARROW_TYPE_DECIMAL32,
                                                      cudf::detail::max_precision<int32_t>(),
                                                      -scale));
     NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(expected_schema->children[0], "a"));
@@ -603,7 +603,7 @@ TEST_F(ToArrowHostDeviceTest, FixedPoint32Table)
       ArrowArrayInitFromSchema(expected_array.get(), expected_schema.get(), nullptr));
     expected_array->length = input.num_rows();
 
-    get_nanoarrow_array<__int128_t>(data).move(expected_array->children[0]);
+    get_nanoarrow_array<int32_t>(data).move(expected_array->children[0]);
     NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(expected_array.get(), nullptr));
 
     auto got_arrow_host = cudf::to_arrow_host(input);
@@ -639,13 +639,13 @@ TEST_F(ToArrowHostDeviceTest, FixedPoint64Table)
     auto const col   = fp_wrapper<int64_t>({-1, 2, 3, 4, 5, 6}, scale_type{scale});
     auto const input = cudf::table_view({col});
 
-    auto const data = std::vector<__int128_t>{-1, 2, 3, 4, 5, 6};
+    auto const data = std::vector<int64_t>{-1, 2, 3, 4, 5, 6};
     nanoarrow::UniqueSchema expected_schema;
     ArrowSchemaInit(expected_schema.get());
     NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(expected_schema.get(), 1));
     ArrowSchemaInit(expected_schema->children[0]);
     NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(expected_schema->children[0],
-                                                     NANOARROW_TYPE_DECIMAL128,
+                                                     NANOARROW_TYPE_DECIMAL64,
                                                      cudf::detail::max_precision<int64_t>(),
                                                      -scale));
     NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(expected_schema->children[0], "a"));
@@ -656,7 +656,7 @@ TEST_F(ToArrowHostDeviceTest, FixedPoint64Table)
       ArrowArrayInitFromSchema(expected_array.get(), expected_schema.get(), nullptr));
     expected_array->length = input.num_rows();
 
-    get_nanoarrow_array<__int128_t>(data).move(expected_array->children[0]);
+    get_nanoarrow_array<int64_t>(data).move(expected_array->children[0]);
     NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(expected_array.get(), nullptr));
 
     auto got_arrow_host = cudf::to_arrow_host(input);
@@ -748,7 +748,7 @@ TEST_F(ToArrowHostDeviceTest, FixedPoint32TableLarge)
     auto const col   = fp_wrapper<int32_t>(iota, iota + NUM_ELEMENTS, scale_type{scale});
     auto const input = cudf::table_view({col});
 
-    auto expect_data = std::vector<__int128_t>(NUM_ELEMENTS);
+    auto expect_data = std::vector<int32_t>(NUM_ELEMENTS);
     std::iota(expect_data.begin(), expect_data.end(), 1);
 
     nanoarrow::UniqueSchema expected_schema;
@@ -756,7 +756,7 @@ TEST_F(ToArrowHostDeviceTest, FixedPoint32TableLarge)
     NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(expected_schema.get(), 1));
     ArrowSchemaInit(expected_schema->children[0]);
     NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(expected_schema->children[0],
-                                                     NANOARROW_TYPE_DECIMAL128,
+                                                     NANOARROW_TYPE_DECIMAL32,
                                                      cudf::detail::max_precision<int32_t>(),
                                                      -scale));
     NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(expected_schema->children[0], "a"));
@@ -767,7 +767,7 @@ TEST_F(ToArrowHostDeviceTest, FixedPoint32TableLarge)
       ArrowArrayInitFromSchema(expected_array.get(), expected_schema.get(), nullptr));
     expected_array->length = input.num_rows();
 
-    get_nanoarrow_array<__int128_t>(expect_data).move(expected_array->children[0]);
+    get_nanoarrow_array<int32_t>(expect_data).move(expected_array->children[0]);
     NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(expected_array.get(), nullptr));
 
     auto got_arrow_host = cudf::to_arrow_host(input);
@@ -805,7 +805,7 @@ TEST_F(ToArrowHostDeviceTest, FixedPoint64TableLarge)
     auto const col   = fp_wrapper<int64_t>(iota, iota + NUM_ELEMENTS, scale_type{scale});
     auto const input = cudf::table_view({col});
 
-    auto expect_data = std::vector<__int128_t>(NUM_ELEMENTS);
+    auto expect_data = std::vector<int64_t>(NUM_ELEMENTS);
     std::iota(expect_data.begin(), expect_data.end(), 1);
 
     nanoarrow::UniqueSchema expected_schema;
@@ -813,7 +813,7 @@ TEST_F(ToArrowHostDeviceTest, FixedPoint64TableLarge)
     NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(expected_schema.get(), 1));
     ArrowSchemaInit(expected_schema->children[0]);
     NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(expected_schema->children[0],
-                                                     NANOARROW_TYPE_DECIMAL128,
+                                                     NANOARROW_TYPE_DECIMAL64,
                                                      cudf::detail::max_precision<int64_t>(),
                                                      -scale));
     NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(expected_schema->children[0], "a"));
@@ -824,7 +824,7 @@ TEST_F(ToArrowHostDeviceTest, FixedPoint64TableLarge)
       ArrowArrayInitFromSchema(expected_array.get(), expected_schema.get(), nullptr));
     expected_array->length = input.num_rows();
 
-    get_nanoarrow_array<__int128_t>(expect_data).move(expected_array->children[0]);
+    get_nanoarrow_array<int64_t>(expect_data).move(expected_array->children[0]);
     NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(expected_array.get(), nullptr));
 
     auto got_arrow_host = cudf::to_arrow_host(input);
@@ -914,7 +914,7 @@ TEST_F(ToArrowHostDeviceTest, FixedPoint32TableNullsSimple)
   using namespace numeric;
 
   for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) {
-    auto const data     = std::vector<__int128_t>{1, 2, 3, 4, 5, 6, 0, 0};
+    auto const data     = std::vector<int32_t>{1, 2, 3, 4, 5, 6, 0, 0};
     auto const validity = std::vector<uint8_t>{1, 1, 1, 1, 1, 1, 0, 0};
     auto const col =
       fp_wrapper<int32_t>({1, 2, 3, 4, 5, 6, 0, 0}, {1, 1, 1, 1, 1, 1, 0, 0}, scale_type{scale});
@@ -925,7 +925,7 @@ TEST_F(ToArrowHostDeviceTest, FixedPoint32TableNullsSimple)
     NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(expected_schema.get(), 1));
     ArrowSchemaInit(expected_schema->children[0]);
     NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(expected_schema->children[0],
-                                                     NANOARROW_TYPE_DECIMAL128,
+                                                     NANOARROW_TYPE_DECIMAL32,
                                                      cudf::detail::max_precision<int32_t>(),
                                                      -scale));
     NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(expected_schema->children[0], "a"));
@@ -936,7 +936,7 @@ TEST_F(ToArrowHostDeviceTest, FixedPoint32TableNullsSimple)
       ArrowArrayInitFromSchema(expected_array.get(), expected_schema.get(), nullptr));
     expected_array->length = input.num_rows();
 
-    get_nanoarrow_array<__int128_t>(data, validity).move(expected_array->children[0]);
+    get_nanoarrow_array<int32_t>(data, validity).move(expected_array->children[0]);
     NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(expected_array.get(), nullptr));
 
     auto got_arrow_host = cudf::to_arrow_host(input);
@@ -969,7 +969,7 @@ TEST_F(ToArrowHostDeviceTest, FixedPoint64TableNullsSimple)
   using namespace numeric;
 
   for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) {
-    auto const data     = std::vector<__int128_t>{1, 2, 3, 4, 5, 6, 0, 0};
+    auto const data     = std::vector<int64_t>{1, 2, 3, 4, 5, 6, 0, 0};
     auto const validity = std::vector<uint8_t>{1, 1, 1, 1, 1, 1, 0, 0};
     auto const col =
       fp_wrapper<int64_t>({1, 2, 3, 4, 5, 6, 0, 0}, {1, 1, 1, 1, 1, 1, 0, 0}, scale_type{scale});
@@ -980,7 +980,7 @@ TEST_F(ToArrowHostDeviceTest, FixedPoint64TableNullsSimple)
     NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(expected_schema.get(), 1));
     ArrowSchemaInit(expected_schema->children[0]);
     NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(expected_schema->children[0],
-                                                     NANOARROW_TYPE_DECIMAL128,
+                                                     NANOARROW_TYPE_DECIMAL64,
                                                      cudf::detail::max_precision<int64_t>(),
                                                      -scale));
     NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(expected_schema->children[0], "a"));
@@ -991,7 +991,7 @@ TEST_F(ToArrowHostDeviceTest, FixedPoint64TableNullsSimple)
       ArrowArrayInitFromSchema(expected_array.get(), expected_schema.get(), nullptr));
     expected_array->length = input.num_rows();
 
-    get_nanoarrow_array<__int128_t>(data, validity).move(expected_array->children[0]);
+    get_nanoarrow_array<int64_t>(data, validity).move(expected_array->children[0]);
     NANOARROW_THROW_NOT_OK(ArrowArrayFinishBuildingDefault(expected_array.get(), nullptr));
 
     auto got_arrow_host = cudf::to_arrow_host(input);
diff --git a/cpp/tests/interop/to_arrow_test.cpp b/cpp/tests/interop/to_arrow_test.cpp
index 28a80502f08..393dc098c9c 100644
--- a/cpp/tests/interop/to_arrow_test.cpp
+++ b/cpp/tests/interop/to_arrow_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -258,7 +258,7 @@ TEST_F(ToArrowTest, NestedList)
 
   auto list_arr = get_arrow_list_array<int64_t>({6, 7, 8, 9}, {0, 1, 4}, {1, 0, 1, 1});
   std::vector<int32_t> offset{0, 0, 2};
-  auto mask_buffer     = arrow::internal::BytesToBits({0, 1}).ValueOrDie();
+  auto mask_buffer     = arrow::internal::BytesToBits(std::vector<uint8_t>({0, 1})).ValueOrDie();
   auto nested_list_arr = std::make_shared<arrow::ListArray>(
     arrow::list(arrow::field("element", arrow::list(arrow::int64()), false)),
     offset.size() - 1,
@@ -337,9 +337,10 @@ TEST_F(ToArrowTest, StructColumn)
   auto fields2 = std::vector<std::shared_ptr<arrow::Field>>{
     std::make_shared<arrow::Field>("string2", str2_array->type(), str2_array->null_count() > 0),
     std::make_shared<arrow::Field>("integral2", int2_array->type(), int2_array->null_count() > 0)};
-  auto dtype2                                = std::make_shared<arrow::StructType>(fields2);
-  std::shared_ptr<arrow::Buffer> mask_buffer = arrow::internal::BytesToBits({1, 1, 0}).ValueOrDie();
-  auto struct_array2                         = std::make_shared<arrow::StructArray>(
+  auto dtype2 = std::make_shared<arrow::StructType>(fields2);
+  std::shared_ptr<arrow::Buffer> mask_buffer =
+    arrow::internal::BytesToBits(std::vector<uint8_t>({1, 1, 0})).ValueOrDie();
+  auto struct_array2 = std::make_shared<arrow::StructArray>(
     dtype2, static_cast<int64_t>(input_view.num_rows()), child_arrays2, mask_buffer);
 
   std::vector<std::shared_ptr<arrow::Array>> child_arrays(
@@ -377,9 +378,9 @@ TEST_F(ToArrowTest, FixedPoint64Table)
   for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) {
     auto const col         = fp_wrapper<int64_t>({-1, 2, 3, 4, 5, 6}, scale_type{scale});
     auto const input       = cudf::table_view({col});
-    auto const expect_data = std::vector<int64_t>{-1, -1, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0};
+    auto const expect_data = std::vector<int64_t>{-1, 2, 3, 4, 5, 6};
 
-    auto const arr = make_decimal128_arrow_array(expect_data, std::nullopt, scale);
+    auto const arr = get_decimal_arrow_array(expect_data, std::nullopt, 18, scale);
 
     auto const field                = arrow::field("a", arr->type());
     auto const schema_vector        = std::vector<std::shared_ptr<arrow::Field>>({field});
@@ -400,7 +401,7 @@ TEST_F(ToArrowTest, FixedPoint128Table)
     auto const input       = cudf::table_view({col});
     auto const expect_data = std::vector<__int128_t>{-1, 2, 3, 4, 5, 6};
 
-    auto const arr = make_decimal128_arrow_array(expect_data, std::nullopt, scale);
+    auto const arr = get_decimal_arrow_array(expect_data, std::nullopt, 38, scale);
 
     auto const field                = arrow::field("a", arr->type());
     auto const schema_vector        = std::vector<std::shared_ptr<arrow::Field>>({field});
@@ -415,20 +416,15 @@ TEST_F(ToArrowTest, FixedPoint128Table)
 TEST_F(ToArrowTest, FixedPoint64TableLarge)
 {
   using namespace numeric;
-  auto constexpr BIT_WIDTH_RATIO = 2;  // Array::Type:type::DECIMAL (128) / int64_t
-  auto constexpr NUM_ELEMENTS    = 1000;
+  auto constexpr NUM_ELEMENTS = 1000;
 
   for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) {
-    auto const iota  = thrust::make_counting_iterator(1);
-    auto const col   = fp_wrapper<int64_t>(iota, iota + NUM_ELEMENTS, scale_type{scale});
-    auto const input = cudf::table_view({col});
-
-    auto const every_other = [](auto i) { return i % 2 == 0 ? i / 2 : 0; };
-    auto const transform   = cudf::detail::make_counting_transform_iterator(2, every_other);
-    auto const expect_data =
-      std::vector<int64_t>{transform, transform + NUM_ELEMENTS * BIT_WIDTH_RATIO};
+    auto const iota        = thrust::make_counting_iterator(1);
+    auto const col         = fp_wrapper<int64_t>(iota, iota + NUM_ELEMENTS, scale_type{scale});
+    auto const input       = cudf::table_view({col});
+    auto const expect_data = std::vector<int64_t>{iota, iota + NUM_ELEMENTS};
 
-    auto const arr = make_decimal128_arrow_array(expect_data, std::nullopt, scale);
+    auto const arr = get_decimal_arrow_array(expect_data, std::nullopt, 18, scale);
 
     auto const field                = arrow::field("a", arr->type());
     auto const schema_vector        = std::vector<std::shared_ptr<arrow::Field>>({field});
@@ -451,7 +447,7 @@ TEST_F(ToArrowTest, FixedPoint128TableLarge)
     auto const input       = cudf::table_view({col});
     auto const expect_data = std::vector<__int128_t>{iota, iota + NUM_ELEMENTS};
 
-    auto const arr = make_decimal128_arrow_array(expect_data, std::nullopt, scale);
+    auto const arr = get_decimal_arrow_array(expect_data, std::nullopt, 38, scale);
 
     auto const field                = arrow::field("a", arr->type());
     auto const schema_vector        = std::vector<std::shared_ptr<arrow::Field>>({field});
@@ -468,13 +464,13 @@ TEST_F(ToArrowTest, FixedPoint64TableNullsSimple)
   using namespace numeric;
 
   for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) {
-    auto const data     = std::vector<int64_t>{1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 0, 0, 0, 0};
-    auto const validity = std::vector<int32_t>{1, 1, 1, 1, 1, 1, 0, 0};
+    auto const data     = std::vector<int64_t>{1, 2, 3, 4, 5, 6, 0, 0};
+    auto const validity = std::vector<uint8_t>{1, 1, 1, 1, 1, 1, 0, 0};
     auto const col =
       fp_wrapper<int64_t>({1, 2, 3, 4, 5, 6, 0, 0}, {1, 1, 1, 1, 1, 1, 0, 0}, scale_type{scale});
     auto const input = cudf::table_view({col});
 
-    auto const arr = make_decimal128_arrow_array(data, validity, scale);
+    auto const arr = get_decimal_arrow_array(data, validity, 18, scale);
 
     auto const field         = arrow::field("a", arr->type());
     auto const schema_vector = std::vector<std::shared_ptr<arrow::Field>>({field});
@@ -492,12 +488,12 @@ TEST_F(ToArrowTest, FixedPoint128TableNullsSimple)
 
   for (auto const scale : {3, 2, 1, 0, -1, -2, -3}) {
     auto const data     = std::vector<__int128_t>{1, 2, 3, 4, 5, 6, 0, 0};
-    auto const validity = std::vector<int32_t>{1, 1, 1, 1, 1, 1, 0, 0};
+    auto const validity = std::vector<uint8_t>{1, 1, 1, 1, 1, 1, 0, 0};
     auto const col =
       fp_wrapper<__int128_t>({1, 2, 3, 4, 5, 6, 0, 0}, {1, 1, 1, 1, 1, 1, 0, 0}, scale_type{scale});
     auto const input = cudf::table_view({col});
 
-    auto const arr = make_decimal128_arrow_array(data, validity, scale);
+    auto const arr = get_decimal_arrow_array(data, validity, 38, scale);
 
     auto const field         = arrow::field("a", arr->type());
     auto const schema_vector = std::vector<std::shared_ptr<arrow::Field>>({field});
@@ -518,11 +514,10 @@ TEST_F(ToArrowTest, FixedPoint64TableNulls)
       {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 0, 1, 0, 1, 0, 1, 0, 1, 0}, scale_type{scale});
     auto const input = cudf::table_view({col});
 
-    auto const expect_data =
-      std::vector<int64_t>{1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0, 8, 0, 9, 0, 10, 0};
-    auto const validity = std::vector<int32_t>{1, 0, 1, 0, 1, 0, 1, 0, 1, 0};
+    auto const expect_data = std::vector<int64_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+    auto const validity    = std::vector<uint8_t>{1, 0, 1, 0, 1, 0, 1, 0, 1, 0};
 
-    auto arr = make_decimal128_arrow_array(expect_data, validity, scale);
+    auto const arr = get_decimal_arrow_array(expect_data, validity, 18, scale);
 
     auto const field                = arrow::field("a", arr->type());
     auto const schema_vector        = std::vector<std::shared_ptr<arrow::Field>>({field});
@@ -544,9 +539,9 @@ TEST_F(ToArrowTest, FixedPoint128TableNulls)
     auto const input = cudf::table_view({col});
 
     auto const expect_data = std::vector<__int128_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
-    auto const validity    = std::vector<int32_t>{1, 0, 1, 0, 1, 0, 1, 0, 1, 0};
+    auto const validity    = std::vector<uint8_t>{1, 0, 1, 0, 1, 0, 1, 0, 1, 0};
 
-    auto arr = make_decimal128_arrow_array(expect_data, validity, scale);
+    auto const arr = get_decimal_arrow_array(expect_data, validity, 38, scale);
 
     auto const field                = arrow::field("a", arr->type());
     auto const schema_vector        = std::vector<std::shared_ptr<arrow::Field>>({field});
@@ -611,7 +606,9 @@ auto col_to_arrow_type(cudf::column_view const& col)
     case cudf::type_id::STRING: return arrow::utf8();
     case cudf::type_id::LIST:
       return arrow::list(col_to_arrow_type(col.child(cudf::lists_column_view::child_column_index)));
-    case cudf::type_id::DECIMAL128: return arrow::decimal(38, -col.type().scale());
+    case cudf::type_id::DECIMAL32: return arrow::decimal32(9, -col.type().scale());
+    case cudf::type_id::DECIMAL64: return arrow::decimal64(18, -col.type().scale());
+    case cudf::type_id::DECIMAL128: return arrow::decimal128(38, -col.type().scale());
     default: CUDF_FAIL("Unsupported type_id conversion to arrow type", cudf::data_type_error);
   }
 }
@@ -652,27 +649,36 @@ TYPED_TEST(ToArrowNumericScalarTest, Basic)
 
 struct ToArrowDecimalScalarTest : public cudf::test::BaseFixture {};
 
-// Only testing Decimal128 because that's the only size cudf and arrow have in common.
-TEST_F(ToArrowDecimalScalarTest, Basic)
+template <typename DecimalType>
+void check_decimal_scalar(int const value, arrow::Scalar const& ref_scalar, int32_t const scale)
 {
-  auto const value{42};
-  auto const precision =
-    cudf::detail::max_precision<__int128_t>();  // cudf will convert to the widest-precision Arrow
-                                                // scalar of the type
-  int32_t const scale{4};
-
   auto const cudf_scalar =
-    cudf::make_fixed_point_scalar<numeric::decimal128>(value, numeric::scale_type{scale});
+    cudf::make_fixed_point_scalar<DecimalType>(value, numeric::scale_type{scale});
 
   auto const maybe_scalar = cudf_scalar_to_arrow(*cudf_scalar);
   ASSERT_TRUE(maybe_scalar.has_value());
   auto const arrow_scalar = *maybe_scalar;
+  EXPECT_TRUE(arrow_scalar->Equals(ref_scalar));
+}
 
-  auto const maybe_ref_arrow_scalar =
-    arrow::MakeScalar(arrow::decimal128(precision, -scale), value);
-  if (!maybe_ref_arrow_scalar.ok()) { CUDF_FAIL("Failed to construct reference scalar"); }
-  auto const ref_arrow_scalar = *maybe_ref_arrow_scalar;
-  EXPECT_TRUE(arrow_scalar->Equals(*ref_arrow_scalar));
+TEST_F(ToArrowDecimalScalarTest, Basic)
+{
+  auto const value{42};
+  int32_t const scale{4};
+
+  auto const get_ref_scalar = [&](std::shared_ptr<arrow::DataType> type) {
+    auto const maybe_ref_scalar = arrow::MakeScalar(type, value);
+    if (!maybe_ref_scalar.ok()) { CUDF_FAIL("Failed to construct reference scalar"); }
+    return *maybe_ref_scalar;
+  };
+
+  auto const decimal32_scalar  = get_ref_scalar(arrow::decimal32(9, -scale));
+  auto const decimal64_scalar  = get_ref_scalar(arrow::decimal64(18, -scale));
+  auto const decimal128_scalar = get_ref_scalar(arrow::decimal128(38, -scale));
+
+  check_decimal_scalar<numeric::decimal32>(value, *decimal32_scalar, scale);
+  check_decimal_scalar<numeric::decimal64>(value, *decimal64_scalar, scale);
+  check_decimal_scalar<numeric::decimal128>(value, *decimal128_scalar, scale);
 }
 
 struct ToArrowStringScalarTest : public cudf::test::BaseFixture {};
diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java
index 7eb32892bad..642eb7f037c 100644
--- a/java/src/test/java/ai/rapids/cudf/TableTest.java
+++ b/java/src/test/java/ai/rapids/cudf/TableTest.java
@@ -40,6 +40,7 @@
 import org.apache.parquet.schema.GroupType;
 import org.apache.parquet.schema.MessageType;
 import org.apache.parquet.schema.OriginalType;
+import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
 
 import java.io.*;
@@ -9462,6 +9463,9 @@ private Table castDecimal64To128(Table t) {
   }
 
   @Test
+  @Disabled("arrow-java does not yet support Decimal32/Decimal64, so now that" +
+    "we don't automatically upcast to decimal128 on conversion to arrow, we have" +
+    "to wait until it supports those types, then upgrade")
   void testArrowIPCWriteToFileWithNamesAndMetadata() throws IOException {
     File tempFile = File.createTempFile("test-names-metadata", ".arrow");
     String[] columnNames = WriteUtils.getNonNestedColumns(false);
@@ -9495,6 +9499,8 @@ void testArrowIPCWriteToFileWithNamesAndMetadata() throws IOException {
   }
 
   @Test
+  @Disabled("arrow-java does not yet support Decimal32/Decimal64, " +
+    "this can be re-enabled once it does and we upgrade")
   void testArrowIPCWriteToBufferChunked() {
     String[] nonNestedCols = WriteUtils.getNonNestedColumns(false);
     List<String> columns = Lists.newArrayList(nonNestedCols);
diff --git a/python/cudf/cudf/tests/test_reductions.py b/python/cudf/cudf/tests/test_reductions.py
index e0bc8f32c9b..80ffce9e8be 100644
--- a/python/cudf/cudf/tests/test_reductions.py
+++ b/python/cudf/cudf/tests/test_reductions.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 
 
 from decimal import Decimal
@@ -6,6 +6,7 @@
 
 import numpy as np
 import pandas as pd
+import pyarrow as pa
 import pytest
 
 import cudf
@@ -53,10 +54,34 @@ def test_sum_string():
 @pytest.mark.parametrize(
     "dtype",
     [
-        Decimal64Dtype(6, 3),
-        Decimal64Dtype(10, 6),
-        Decimal64Dtype(16, 7),
-        Decimal32Dtype(6, 3),
+        pytest.param(
+            Decimal64Dtype(6, 3),
+            marks=pytest.mark.skipif(
+                pa._generated_version.version_tuple[0] < 19,
+                reason="decimal64 format string only supported in pyarrow >=19",
+            ),
+        ),
+        pytest.param(
+            Decimal64Dtype(10, 6),
+            marks=pytest.mark.skipif(
+                pa._generated_version.version_tuple[0] < 19,
+                reason="decimal64 format string only supported in pyarrow >=19",
+            ),
+        ),
+        pytest.param(
+            Decimal64Dtype(16, 7),
+            marks=pytest.mark.skipif(
+                pa._generated_version.version_tuple[0] < 19,
+                reason="decimal64 format string only supported in pyarrow >=19",
+            ),
+        ),
+        pytest.param(
+            Decimal32Dtype(6, 3),
+            marks=pytest.mark.skipif(
+                pa._generated_version.version_tuple[0] < 19,
+                reason="decimal32 format string only supported in pyarrow >=19",
+            ),
+        ),
         Decimal128Dtype(20, 7),
     ],
 )
@@ -93,10 +118,34 @@ def test_product(dtype, nelem):
 @pytest.mark.parametrize(
     "dtype",
     [
-        Decimal64Dtype(6, 2),
-        Decimal64Dtype(8, 4),
-        Decimal64Dtype(10, 5),
-        Decimal32Dtype(6, 2),
+        pytest.param(
+            Decimal64Dtype(6, 2),
+            marks=pytest.mark.skipif(
+                pa._generated_version.version_tuple[0] < 19,
+                reason="decimal64 format string only supported in pyarrow >=19",
+            ),
+        ),
+        pytest.param(
+            Decimal64Dtype(8, 4),
+            marks=pytest.mark.skipif(
+                pa._generated_version.version_tuple[0] < 19,
+                reason="decimal64 format string only supported in pyarrow >=19",
+            ),
+        ),
+        pytest.param(
+            Decimal64Dtype(10, 5),
+            marks=pytest.mark.skipif(
+                pa._generated_version.version_tuple[0] < 19,
+                reason="decimal64 format string only supported in pyarrow >=19",
+            ),
+        ),
+        pytest.param(
+            Decimal32Dtype(6, 2),
+            marks=pytest.mark.skipif(
+                pa._generated_version.version_tuple[0] < 19,
+                reason="decimal32 format string only supported in pyarrow >=19",
+            ),
+        ),
         Decimal128Dtype(20, 5),
     ],
 )
@@ -141,11 +190,35 @@ def test_sum_of_squares(dtype, nelem):
 @pytest.mark.parametrize(
     "dtype",
     [
-        Decimal64Dtype(6, 2),
-        Decimal64Dtype(8, 4),
-        Decimal64Dtype(10, 5),
+        pytest.param(
+            Decimal64Dtype(6, 2),
+            marks=pytest.mark.skipif(
+                pa._generated_version.version_tuple[0] < 19,
+                reason="decimal64 format string only supported in pyarrow >=19",
+            ),
+        ),
+        pytest.param(
+            Decimal64Dtype(8, 4),
+            marks=pytest.mark.skipif(
+                pa._generated_version.version_tuple[0] < 19,
+                reason="decimal64 format string only supported in pyarrow >=19",
+            ),
+        ),
+        pytest.param(
+            Decimal64Dtype(10, 5),
+            marks=pytest.mark.skipif(
+                pa._generated_version.version_tuple[0] < 19,
+                reason="decimal64 format string only supported in pyarrow >=19",
+            ),
+        ),
         Decimal128Dtype(20, 7),
-        Decimal32Dtype(6, 2),
+        pytest.param(
+            Decimal32Dtype(6, 2),
+            marks=pytest.mark.skipif(
+                pa._generated_version.version_tuple[0] < 19,
+                reason="decimal32 format string only supported in pyarrow >=19",
+            ),
+        ),
     ],
 )
 def test_sum_of_squares_decimal(dtype):
@@ -172,10 +245,34 @@ def test_min(dtype, nelem):
 @pytest.mark.parametrize(
     "dtype",
     [
-        Decimal64Dtype(6, 3),
-        Decimal64Dtype(10, 6),
-        Decimal64Dtype(16, 7),
-        Decimal32Dtype(6, 3),
+        pytest.param(
+            Decimal64Dtype(6, 3),
+            marks=pytest.mark.skipif(
+                pa._generated_version.version_tuple[0] < 19,
+                reason="decimal64 format string only supported in pyarrow >=19",
+            ),
+        ),
+        pytest.param(
+            Decimal64Dtype(10, 6),
+            marks=pytest.mark.skipif(
+                pa._generated_version.version_tuple[0] < 19,
+                reason="decimal64 format string only supported in pyarrow >=19",
+            ),
+        ),
+        pytest.param(
+            Decimal64Dtype(16, 7),
+            marks=pytest.mark.skipif(
+                pa._generated_version.version_tuple[0] < 19,
+                reason="decimal64 format string only supported in pyarrow >=19",
+            ),
+        ),
+        pytest.param(
+            Decimal32Dtype(6, 3),
+            marks=pytest.mark.skipif(
+                pa._generated_version.version_tuple[0] < 19,
+                reason="decimal32 format string only supported in pyarrow >=19",
+            ),
+        ),
         Decimal128Dtype(20, 7),
     ],
 )
@@ -204,10 +301,34 @@ def test_max(dtype, nelem):
 @pytest.mark.parametrize(
     "dtype",
     [
-        Decimal64Dtype(6, 3),
-        Decimal64Dtype(10, 6),
-        Decimal64Dtype(16, 7),
-        Decimal32Dtype(6, 3),
+        pytest.param(
+            Decimal64Dtype(6, 3),
+            marks=pytest.mark.skipif(
+                pa._generated_version.version_tuple[0] < 19,
+                reason="decimal64 format string only supported in pyarrow >=19",
+            ),
+        ),
+        pytest.param(
+            Decimal64Dtype(10, 6),
+            marks=pytest.mark.skipif(
+                pa._generated_version.version_tuple[0] < 19,
+                reason="decimal64 format string only supported in pyarrow >=19",
+            ),
+        ),
+        pytest.param(
+            Decimal64Dtype(16, 7),
+            marks=pytest.mark.skipif(
+                pa._generated_version.version_tuple[0] < 19,
+                reason="decimal64 format string only supported in pyarrow >=19",
+            ),
+        ),
+        pytest.param(
+            Decimal32Dtype(6, 3),
+            marks=pytest.mark.skipif(
+                pa._generated_version.version_tuple[0] < 19,
+                reason="decimal32 format string only supported in pyarrow >=19",
+            ),
+        ),
         Decimal128Dtype(20, 7),
     ],
 )
diff --git a/python/cudf/cudf/tests/test_scalar.py b/python/cudf/cudf/tests/test_scalar.py
index 1e120cfb293..13a0d7b3ba1 100644
--- a/python/cudf/cudf/tests/test_scalar.py
+++ b/python/cudf/cudf/tests/test_scalar.py
@@ -163,7 +163,23 @@ def test_scalar_device_initialization(value):
 @pytest.mark.parametrize("value", DECIMAL_VALUES)
 @pytest.mark.parametrize(
     "decimal_type",
-    [cudf.Decimal32Dtype, cudf.Decimal64Dtype, cudf.Decimal128Dtype],
+    [
+        pytest.param(
+            cudf.Decimal32Dtype,
+            marks=pytest.mark.skipif(
+                pa._generated_version.version_tuple[0] < 19,
+                reason="decimal32 format string only supported in pyarrow>=19",
+            ),
+        ),
+        pytest.param(
+            cudf.Decimal64Dtype,
+            marks=pytest.mark.skipif(
+                pa._generated_version.version_tuple[0] < 19,
+                reason="decimal64 format string only supported in pyarrow>=19",
+            ),
+        ),
+        cudf.Decimal128Dtype,
+    ],
 )
 def test_scalar_device_initialization_decimal(value, decimal_type):
     dtype = decimal_type._from_decimal(value)
@@ -381,6 +397,50 @@ def test_scalar_invalid_implicit_conversion(cls, dtype):
             cls(slr)
 
 
+@pytest.mark.parametrize("value", SCALAR_VALUES + DECIMAL_VALUES)
+@pytest.mark.parametrize(
+    "decimal_type",
+    [
+        pytest.param(
+            cudf.Decimal32Dtype,
+            marks=pytest.mark.skipif(
+                pa._generated_version.version_tuple[0] < 19,
+                reason="decimal32 format string only supported in pyarrow>=19",
+            ),
+        ),
+        pytest.param(
+            cudf.Decimal64Dtype,
+            marks=pytest.mark.skipif(
+                pa._generated_version.version_tuple[0] < 19,
+                reason="decimal64 format string only supported in pyarrow>=19",
+            ),
+        ),
+        cudf.Decimal128Dtype,
+    ],
+)
+def test_device_scalar_direct_construction(value, decimal_type):
+    value = cudf.utils.dtypes.to_cudf_compatible_scalar(value)
+
+    dtype = (
+        value.dtype
+        if not isinstance(value, Decimal)
+        else decimal_type._from_decimal(value)
+    )
+
+    s = cudf.Scalar(value, dtype)
+
+    assert s.value == value or np.isnan(s.value) and np.isnan(value)
+    if isinstance(
+        dtype, (cudf.Decimal64Dtype, cudf.Decimal128Dtype, cudf.Decimal32Dtype)
+    ):
+        assert s.dtype.precision == dtype.precision
+        assert s.dtype.scale == dtype.scale
+    elif dtype.char == "U":
+        assert s.dtype == "object"
+    else:
+        assert s.dtype == dtype
+
+
 @pytest.mark.parametrize("value", SCALAR_VALUES + DECIMAL_VALUES)
 def test_construct_from_scalar(value):
     value = cudf.utils.dtypes.to_cudf_compatible_scalar(value)