|
| 1 | +/* |
| 2 | + * Copyright (c) 2024, NVIDIA CORPORATION. |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
| 7 | + * |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | + * |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + * See the License for the specific language governing permissions and |
| 14 | + * limitations under the License. |
| 15 | + */ |
| 16 | + |
| 17 | +#include <benchmarks/common/generate_input.hpp> |
| 18 | + |
| 19 | +#include <cudf/types.hpp> |
| 20 | +#include <cudf/unary.hpp> |
| 21 | + |
| 22 | +#include <nvbench/nvbench.cuh> |
| 23 | + |
| 24 | +#include <type_traits> |
| 25 | + |
| 26 | +// This benchmark compares the cost of converting decimal <--> floating point |
| 27 | +template <typename InputType, typename OutputType> |
| 28 | +void bench_cast_decimal(nvbench::state& state, nvbench::type_list<InputType, OutputType>) |
| 29 | +{ |
| 30 | + static constexpr bool is_input_floating = std::is_floating_point_v<InputType>; |
| 31 | + static constexpr bool is_output_floating = std::is_floating_point_v<OutputType>; |
| 32 | + |
| 33 | + static constexpr bool is_double = |
| 34 | + std::is_same_v<InputType, double> || std::is_same_v<OutputType, double>; |
| 35 | + static constexpr bool is_32bit = |
| 36 | + std::is_same_v<InputType, numeric::decimal32> || std::is_same_v<OutputType, numeric::decimal32>; |
| 37 | + static constexpr bool is_128bit = std::is_same_v<InputType, numeric::decimal128> || |
| 38 | + std::is_same_v<OutputType, numeric::decimal128>; |
| 39 | + |
| 40 | + // Skip floating --> floating and decimal --> decimal |
| 41 | + if constexpr (is_input_floating == is_output_floating) { |
| 42 | + state.skip("Meaningless conversion."); |
| 43 | + return; |
| 44 | + } |
| 45 | + |
| 46 | + // Skip float <--> dec128 |
| 47 | + if constexpr (!is_double && is_128bit) { |
| 48 | + state.skip("Ignoring float <--> dec128."); |
| 49 | + return; |
| 50 | + } |
| 51 | + |
| 52 | + // Get settings |
| 53 | + auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows")); |
| 54 | + auto const exp_mode = state.get_int64("exp_range"); |
| 55 | + |
| 56 | + // Exponent range: Range size is 10^6 |
| 57 | + // These probe the edges of the float and double ranges, as well as more common values |
| 58 | + int const exp_min_array[] = {-307, -37, -14, -3, 8, 31, 301}; |
| 59 | + int const exp_range_size = 6; |
| 60 | + int const exp_min = exp_min_array[exp_mode]; |
| 61 | + int const exp_max = exp_min + exp_range_size; |
| 62 | + |
| 63 | + // With exp range size of 6, decimal output (generated or casted-to) has 7 digits of precision |
| 64 | + int const extra_digits_precision = 1; |
| 65 | + |
| 66 | + // Exclude end range of double from float test |
| 67 | + if (!is_double && ((exp_mode == 0) || (exp_mode == 6))) { |
| 68 | + state.skip("Range beyond end of float tests."); |
| 69 | + return; |
| 70 | + } |
| 71 | + |
| 72 | + // The current float <--> decimal conversion algorithm is limited |
| 73 | + static constexpr bool is_64bit = !is_32bit && !is_128bit; |
| 74 | + if (is_32bit && (exp_mode != 3)) { |
| 75 | + state.skip("Decimal32 conversion only works up to scale factors of 10^9."); |
| 76 | + return; |
| 77 | + } |
| 78 | + if (is_64bit && ((exp_mode < 2) || (exp_mode > 4))) { |
| 79 | + state.skip("Decimal64 conversion only works up to scale factors of 10^18."); |
| 80 | + return; |
| 81 | + } |
| 82 | + if (is_128bit && ((exp_mode == 0) || (exp_mode == 6))) { |
| 83 | + state.skip("Decimal128 conversion only works up to scale factors of 10^38."); |
| 84 | + return; |
| 85 | + } |
| 86 | + |
| 87 | + // Type IDs |
| 88 | + auto const input_id = cudf::type_to_id<InputType>(); |
| 89 | + auto const output_id = cudf::type_to_id<OutputType>(); |
| 90 | + |
| 91 | + // Create data profile and scale |
| 92 | + auto const [output_scale, profile] = [&]() { |
| 93 | + if constexpr (is_input_floating) { |
| 94 | + // Range for generated floating point values |
| 95 | + auto get_pow10 = [](auto exp10) { |
| 96 | + return std::pow(static_cast<InputType>(10), static_cast<InputType>(exp10)); |
| 97 | + }; |
| 98 | + InputType const floating_range_min = get_pow10(exp_min); |
| 99 | + InputType const floating_range_max = get_pow10(exp_max); |
| 100 | + |
| 101 | + // With exp range size of 6, output has 7 decimal digits of precision |
| 102 | + auto const decimal_output_scale = exp_min - extra_digits_precision; |
| 103 | + |
| 104 | + // Input distribution |
| 105 | + data_profile const profile = data_profile_builder().distribution( |
| 106 | + input_id, distribution_id::NORMAL, floating_range_min, floating_range_max); |
| 107 | + |
| 108 | + return std::pair{decimal_output_scale, profile}; |
| 109 | + |
| 110 | + } else { // Generating decimals |
| 111 | + |
| 112 | + using decimal_rep_type = typename InputType::rep; |
| 113 | + |
| 114 | + // For exp range size 6 and precision 7, generates ints between 10 and 10^7, |
| 115 | + // with scale factor of: exp_max - 7. This matches floating point generation. |
| 116 | + int const digits_precision = exp_range_size + extra_digits_precision; |
| 117 | + auto const decimal_input_scale = numeric::scale_type{exp_max - digits_precision}; |
| 118 | + |
| 119 | + // Range for generated integer values |
| 120 | + auto get_pow10 = [](auto exp10) { |
| 121 | + return numeric::detail::ipow<decimal_rep_type, numeric::Radix::BASE_10>(exp10); |
| 122 | + }; |
| 123 | + auto const decimal_range_min = get_pow10(digits_precision - exp_range_size); |
| 124 | + auto const decimal_range_max = get_pow10(digits_precision); |
| 125 | + |
| 126 | + // Input distribution |
| 127 | + data_profile const profile = data_profile_builder().distribution(input_id, |
| 128 | + distribution_id::NORMAL, |
| 129 | + decimal_range_min, |
| 130 | + decimal_range_max, |
| 131 | + decimal_input_scale); |
| 132 | + |
| 133 | + return std::pair{0, profile}; |
| 134 | + } |
| 135 | + }(); |
| 136 | + |
| 137 | + // Generate input data |
| 138 | + auto const input_col = create_random_column(input_id, row_count{num_rows}, profile); |
| 139 | + auto const input_view = input_col->view(); |
| 140 | + |
| 141 | + // Output type |
| 142 | + auto const output_type = |
| 143 | + !is_input_floating ? cudf::data_type(output_id) : cudf::data_type(output_id, output_scale); |
| 144 | + |
| 145 | + // Stream |
| 146 | + auto stream = cudf::get_default_stream(); |
| 147 | + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); |
| 148 | + |
| 149 | + // Run benchmark |
| 150 | + state.exec(nvbench::exec_tag::sync, |
| 151 | + [&](nvbench::launch&) { cudf::cast(input_view, output_type); }); |
| 152 | + |
| 153 | + // Throughput statistics |
| 154 | + state.add_element_count(num_rows); |
| 155 | + state.add_global_memory_reads<InputType>(num_rows); |
| 156 | + state.add_global_memory_writes<OutputType>(num_rows); |
| 157 | +} |
| 158 | + |
| 159 | +// Data types |
| 160 | +using data_types = |
| 161 | + nvbench::type_list<float, double, numeric::decimal32, numeric::decimal64, numeric::decimal128>; |
| 162 | + |
| 163 | +NVBENCH_BENCH_TYPES(bench_cast_decimal, NVBENCH_TYPE_AXES(data_types, data_types)) |
| 164 | + .set_name("decimal_floating_conversion") |
| 165 | + .set_type_axes_names({"InputType", "OutputType"}) |
| 166 | + .add_int64_power_of_two_axis("num_rows", {28}) |
| 167 | + .add_int64_axis("exp_range", nvbench::range(0, 6)); |
0 commit comments