Skip to content

Commit feb96cb

Browse files
Benchmark decimal <--> floating conversions. (#15334)
Adds benchmarks for decimal <--> floating conversions. Does so for float <--> decimal32 & decimal64, and for double <--> decimal32, decimal64, and decimal128. Within a column data tends to be in a similar range of values, so this provides separate tests for different representative ranges of powers-of-10. Note that with the current conversion algorithm, the max supported scale of a decimal is the max power of 10 that that type can hold, so scale 9 for decimal32, 19 for decimal64, and 38 for decimal128. Thus only these ranges of floats/doubles are tested. Also adds the ability to generate decimals with a specific (rather than random) scale factor. This expands the API, it does not replace the existing one. All existing tests that generate a column of random decimals will continue to do so with a random scale factor, this capability is opt-in. The machinery for this was already there, but only partially; this change fills it in. Authors: - Paul Mattione (https://github.com/pmattione-nvidia) Approvers: - Nghia Truong (https://github.com/ttnghia) - MithunR (https://github.com/mythrocks) URL: #15334
1 parent 77abf03 commit feb96cb

File tree

4 files changed

+211
-8
lines changed

4 files changed

+211
-8
lines changed

cpp/benchmarks/CMakeLists.txt

+5
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,11 @@ ConfigureNVBench(JSON_WRITER_NVBENCH io/json/json_writer.cpp)
339339
ConfigureNVBench(MULTIBYTE_SPLIT_NVBENCH io/text/multibyte_split.cpp)
340340
target_link_libraries(MULTIBYTE_SPLIT_NVBENCH PRIVATE ZLIB::ZLIB)
341341

342+
# ##################################################################################################
343+
# * decimal benchmark
344+
# ---------------------------------------------------------------------------------
345+
ConfigureNVBench(DECIMAL_NVBENCH decimal/convert_floating.cpp)
346+
342347
add_custom_target(
343348
run_benchmarks
344349
DEPENDS CUDF_BENCHMARKS

cpp/benchmarks/common/generate_input.cu

+3-2
Original file line numberDiff line numberDiff line change
@@ -324,10 +324,11 @@ struct random_value_fn<T, std::enable_if_t<cudf::is_fixed_point<T>()>> {
324324
distribution_fn<DeviceType> dist;
325325
std::optional<numeric::scale_type> scale;
326326

327-
random_value_fn(distribution_params<DeviceType> const& desc)
327+
random_value_fn(distribution_params<T> const& desc)
328328
: lower_bound{desc.lower_bound},
329329
upper_bound{desc.upper_bound},
330-
dist{make_distribution<DeviceType>(desc.id, desc.lower_bound, desc.upper_bound)}
330+
dist{make_distribution<DeviceType>(desc.id, lower_bound, upper_bound)},
331+
scale{desc.scale}
331332
{
332333
}
333334

cpp/benchmarks/common/generate_input.hpp

+36-6
Original file line numberDiff line numberDiff line change
@@ -182,9 +182,17 @@ struct distribution_params<T, std::enable_if_t<std::is_same_v<T, cudf::struct_vi
182182
cudf::size_type max_depth;
183183
};
184184

185-
// Present for compilation only. To be implemented once reader/writers support the fixed width type.
185+
/**
186+
* @brief Fixed-point values are parameterized with a distribution type, scale, and bounds of the
187+
* same type.
188+
*/
186189
template <typename T>
187-
struct distribution_params<T, std::enable_if_t<cudf::is_fixed_point<T>()>> {};
190+
struct distribution_params<T, std::enable_if_t<cudf::is_fixed_point<T>()>> {
191+
distribution_id id;
192+
typename T::rep lower_bound;
193+
typename T::rep upper_bound;
194+
std::optional<numeric::scale_type> scale;
195+
};
188196

189197
/**
190198
* @brief Returns a vector of types, corresponding to the input type or a type group.
@@ -226,7 +234,7 @@ class data_profile {
226234
cudf::type_id::INT32, {distribution_id::GEOMETRIC, 0, 64}, 2};
227235
distribution_params<cudf::struct_view> struct_dist_desc{
228236
{cudf::type_id::INT32, cudf::type_id::FLOAT32, cudf::type_id::STRING}, 2};
229-
std::map<cudf::type_id, distribution_params<__uint128_t>> decimal_params;
237+
std::map<cudf::type_id, distribution_params<numeric::decimal128>> decimal_params;
230238

231239
double bool_probability_true = 0.5;
232240
std::optional<double> null_probability = 0.01;
@@ -300,16 +308,21 @@ class data_profile {
300308
}
301309

302310
template <typename T, std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
303-
distribution_params<typename T::rep> get_distribution_params() const
311+
distribution_params<T> get_distribution_params() const
304312
{
305313
using rep = typename T::rep;
306314
auto it = decimal_params.find(cudf::type_to_id<T>());
307315
if (it == decimal_params.end()) {
308316
auto const range = default_range<rep>();
309-
return distribution_params<rep>{default_distribution_id<rep>(), range.first, range.second};
317+
auto const scale = std::optional<numeric::scale_type>{};
318+
return distribution_params<T>{
319+
default_distribution_id<rep>(), range.first, range.second, scale};
310320
} else {
311321
auto& desc = it->second;
312-
return {desc.id, static_cast<rep>(desc.lower_bound), static_cast<rep>(desc.upper_bound)};
322+
return {desc.id,
323+
static_cast<rep>(desc.lower_bound),
324+
static_cast<rep>(desc.upper_bound),
325+
desc.scale};
313326
}
314327
}
315328

@@ -359,6 +372,23 @@ class data_profile {
359372
}
360373
}
361374

375+
// Users should pass integral values for bounds when setting the parameters for fixed-point.
376+
// Otherwise the call with have no effect.
377+
template <typename T,
378+
typename Type_enum,
379+
std::enable_if_t<cuda::std::is_integral_v<T>, T>* = nullptr>
380+
void set_distribution_params(Type_enum type_or_group,
381+
distribution_id dist,
382+
T lower_bound,
383+
T upper_bound,
384+
numeric::scale_type scale)
385+
{
386+
for (auto tid : get_type_or_group(static_cast<int32_t>(type_or_group))) {
387+
decimal_params[tid] = {
388+
dist, static_cast<__int128_t>(lower_bound), static_cast<__int128_t>(upper_bound), scale};
389+
}
390+
}
391+
362392
template <typename T, typename Type_enum, std::enable_if_t<cudf::is_chrono<T>(), T>* = nullptr>
363393
void set_distribution_params(Type_enum type_or_group,
364394
distribution_id dist,
+167
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
/*
2+
* Copyright (c) 2024, NVIDIA CORPORATION.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include <benchmarks/common/generate_input.hpp>
18+
19+
#include <cudf/types.hpp>
20+
#include <cudf/unary.hpp>
21+
22+
#include <nvbench/nvbench.cuh>
23+
24+
#include <type_traits>
25+
26+
// This benchmark compares the cost of converting decimal <--> floating point
27+
template <typename InputType, typename OutputType>
28+
void bench_cast_decimal(nvbench::state& state, nvbench::type_list<InputType, OutputType>)
29+
{
30+
static constexpr bool is_input_floating = std::is_floating_point_v<InputType>;
31+
static constexpr bool is_output_floating = std::is_floating_point_v<OutputType>;
32+
33+
static constexpr bool is_double =
34+
std::is_same_v<InputType, double> || std::is_same_v<OutputType, double>;
35+
static constexpr bool is_32bit =
36+
std::is_same_v<InputType, numeric::decimal32> || std::is_same_v<OutputType, numeric::decimal32>;
37+
static constexpr bool is_128bit = std::is_same_v<InputType, numeric::decimal128> ||
38+
std::is_same_v<OutputType, numeric::decimal128>;
39+
40+
// Skip floating --> floating and decimal --> decimal
41+
if constexpr (is_input_floating == is_output_floating) {
42+
state.skip("Meaningless conversion.");
43+
return;
44+
}
45+
46+
// Skip float <--> dec128
47+
if constexpr (!is_double && is_128bit) {
48+
state.skip("Ignoring float <--> dec128.");
49+
return;
50+
}
51+
52+
// Get settings
53+
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
54+
auto const exp_mode = state.get_int64("exp_range");
55+
56+
// Exponent range: Range size is 10^6
57+
// These probe the edges of the float and double ranges, as well as more common values
58+
int const exp_min_array[] = {-307, -37, -14, -3, 8, 31, 301};
59+
int const exp_range_size = 6;
60+
int const exp_min = exp_min_array[exp_mode];
61+
int const exp_max = exp_min + exp_range_size;
62+
63+
// With exp range size of 6, decimal output (generated or casted-to) has 7 digits of precision
64+
int const extra_digits_precision = 1;
65+
66+
// Exclude end range of double from float test
67+
if (!is_double && ((exp_mode == 0) || (exp_mode == 6))) {
68+
state.skip("Range beyond end of float tests.");
69+
return;
70+
}
71+
72+
// The current float <--> decimal conversion algorithm is limited
73+
static constexpr bool is_64bit = !is_32bit && !is_128bit;
74+
if (is_32bit && (exp_mode != 3)) {
75+
state.skip("Decimal32 conversion only works up to scale factors of 10^9.");
76+
return;
77+
}
78+
if (is_64bit && ((exp_mode < 2) || (exp_mode > 4))) {
79+
state.skip("Decimal64 conversion only works up to scale factors of 10^18.");
80+
return;
81+
}
82+
if (is_128bit && ((exp_mode == 0) || (exp_mode == 6))) {
83+
state.skip("Decimal128 conversion only works up to scale factors of 10^38.");
84+
return;
85+
}
86+
87+
// Type IDs
88+
auto const input_id = cudf::type_to_id<InputType>();
89+
auto const output_id = cudf::type_to_id<OutputType>();
90+
91+
// Create data profile and scale
92+
auto const [output_scale, profile] = [&]() {
93+
if constexpr (is_input_floating) {
94+
// Range for generated floating point values
95+
auto get_pow10 = [](auto exp10) {
96+
return std::pow(static_cast<InputType>(10), static_cast<InputType>(exp10));
97+
};
98+
InputType const floating_range_min = get_pow10(exp_min);
99+
InputType const floating_range_max = get_pow10(exp_max);
100+
101+
// With exp range size of 6, output has 7 decimal digits of precision
102+
auto const decimal_output_scale = exp_min - extra_digits_precision;
103+
104+
// Input distribution
105+
data_profile const profile = data_profile_builder().distribution(
106+
input_id, distribution_id::NORMAL, floating_range_min, floating_range_max);
107+
108+
return std::pair{decimal_output_scale, profile};
109+
110+
} else { // Generating decimals
111+
112+
using decimal_rep_type = typename InputType::rep;
113+
114+
// For exp range size 6 and precision 7, generates ints between 10 and 10^7,
115+
// with scale factor of: exp_max - 7. This matches floating point generation.
116+
int const digits_precision = exp_range_size + extra_digits_precision;
117+
auto const decimal_input_scale = numeric::scale_type{exp_max - digits_precision};
118+
119+
// Range for generated integer values
120+
auto get_pow10 = [](auto exp10) {
121+
return numeric::detail::ipow<decimal_rep_type, numeric::Radix::BASE_10>(exp10);
122+
};
123+
auto const decimal_range_min = get_pow10(digits_precision - exp_range_size);
124+
auto const decimal_range_max = get_pow10(digits_precision);
125+
126+
// Input distribution
127+
data_profile const profile = data_profile_builder().distribution(input_id,
128+
distribution_id::NORMAL,
129+
decimal_range_min,
130+
decimal_range_max,
131+
decimal_input_scale);
132+
133+
return std::pair{0, profile};
134+
}
135+
}();
136+
137+
// Generate input data
138+
auto const input_col = create_random_column(input_id, row_count{num_rows}, profile);
139+
auto const input_view = input_col->view();
140+
141+
// Output type
142+
auto const output_type =
143+
!is_input_floating ? cudf::data_type(output_id) : cudf::data_type(output_id, output_scale);
144+
145+
// Stream
146+
auto stream = cudf::get_default_stream();
147+
state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
148+
149+
// Run benchmark
150+
state.exec(nvbench::exec_tag::sync,
151+
[&](nvbench::launch&) { cudf::cast(input_view, output_type); });
152+
153+
// Throughput statistics
154+
state.add_element_count(num_rows);
155+
state.add_global_memory_reads<InputType>(num_rows);
156+
state.add_global_memory_writes<OutputType>(num_rows);
157+
}
158+
159+
// Data types
160+
using data_types =
161+
nvbench::type_list<float, double, numeric::decimal32, numeric::decimal64, numeric::decimal128>;
162+
163+
NVBENCH_BENCH_TYPES(bench_cast_decimal, NVBENCH_TYPE_AXES(data_types, data_types))
164+
.set_name("decimal_floating_conversion")
165+
.set_type_axes_names({"InputType", "OutputType"})
166+
.add_int64_power_of_two_axis("num_rows", {28})
167+
.add_int64_axis("exp_range", nvbench::range(0, 6));

0 commit comments

Comments
 (0)