From 36b724de043301042d43d0d6bb4194c77b141605 Mon Sep 17 00:00:00 2001 From: "A. Jiang" Date: Tue, 25 Apr 2023 07:36:26 +0800 Subject: [PATCH 1/5] Overhaul floating-point `from_chars` - handle non-zero tail digits in the integral part - change the calculation of the exponent part --- stl/inc/charconv | 71 ++++++++++++++----- .../double_from_chars_test_cases.hpp | 10 +++ tests/std/tests/P0067R5_charconv/test.cpp | 8 +++ 3 files changed, 73 insertions(+), 16 deletions(-) diff --git a/stl/inc/charconv b/stl/inc/charconv index f2326f7e15..6a231858a8 100644 --- a/stl/inc/charconv +++ b/stl/inc/charconv @@ -1579,6 +1579,8 @@ _NODISCARD from_chars_result _Ordinary_floating_from_chars(const char* const _Fi } const char* const _Leading_zero_end = _Next; + bool _Has_zero_tail = true; + // Scan the integer part of the mantissa: for (; _Next != _Last; ++_Next) { const unsigned char _Digit_value = _Digit_from_char(*_Next); @@ -1589,20 +1591,18 @@ _NODISCARD from_chars_result _Ordinary_floating_from_chars(const char* const _Fi if (_Mantissa_it != _Mantissa_last) { *_Mantissa_it++ = _Digit_value; + } else { + _Has_zero_tail = _Has_zero_tail && _Digit_value == 0; } } const char* const _Whole_end = _Next; - // Defend against _Exponent_adjustment integer overflow. (These values don't need to be strict.) - constexpr ptrdiff_t _Maximum_adjustment = 1'000'000; - constexpr ptrdiff_t _Minimum_adjustment = -1'000'000; - // The exponent adjustment holds the number of digits in the mantissa buffer that appeared before the radix point. // It can be negative, and leading zeroes in the integer part are ignored. Examples: // For "03333.111", it is 4. // For "00000.111", it is 0. // For "00000.001", it is -2. - int _Exponent_adjustment = static_cast((_STD min)(_Whole_end - _Leading_zero_end, _Maximum_adjustment)); + ptrdiff_t _Exponent_adjustment = _Whole_end - _Leading_zero_end; // [_Whole_end, _Dot_end) will contain 0 or 1 '.' characters if (_Next != _Last && *_Next == '.') { @@ -1618,12 +1618,10 @@ _NODISCARD from_chars_result _Ordinary_floating_from_chars(const char* const _Fi for (; _Next != _Last && *_Next == '0'; ++_Next) { } - _Exponent_adjustment = static_cast((_STD max)(_Dot_end - _Next, _Minimum_adjustment)); + _Exponent_adjustment = _Dot_end - _Next; } // Scan the fractional part of the mantissa: - bool _Has_zero_tail = true; - for (; _Next != _Last; ++_Next) { const unsigned char _Digit_value = _Digit_from_char(*_Next); @@ -1647,7 +1645,8 @@ _NODISCARD from_chars_result _Ordinary_floating_from_chars(const char* const _Fi const char _Exponent_prefix{_Is_hexadecimal ? 'p' : 'e'}; bool _Exponent_is_negative = false; - int _Exponent = 0; + bool _Exp_abs_too_large = false; + ptrdiff_t _Exponent = 0; constexpr int _Maximum_temporary_decimal_exponent = 5200; constexpr int _Minimum_temporary_decimal_exponent = -5200; @@ -1672,8 +1671,10 @@ _NODISCARD from_chars_result _Ordinary_floating_from_chars(const char* const _Fi // found decimal digit - if (_Exponent <= _Maximum_temporary_decimal_exponent) { + if (_Exponent < PTRDIFF_MAX / 10 || (_Exponent == PTRDIFF_MAX / 10 && _Digit_value <= PTRDIFF_MAX % 10)) { _Exponent = _Exponent * 10 + _Digit_value; + } else { + _Exp_abs_too_large = true; } ++_Unread; @@ -1710,14 +1711,52 @@ _NODISCARD from_chars_result _Ordinary_floating_from_chars(const char* const _Fi return {_Next, errc{}}; } - // Before we adjust the exponent, handle the case where we detected a wildly - // out of range exponent during parsing and clamped the value: - if (_Exponent > _Maximum_temporary_decimal_exponent) { + // Handle exponent of an overly large absolute value. + if (_Exp_abs_too_large) { + if (_Exponent > 0) { + _Assemble_floating_point_infinity(_Fp_string._Myis_negative, _Value); + return {_Next, errc::result_out_of_range}; + } else { + _Assemble_floating_point_zero(_Fp_string._Myis_negative, _Value); + return {_Next, errc::result_out_of_range}; + } + } + + // Adjust _Exponent and _Exponent_adjustment when they have different signedness to avoid overflow. + if (_Exponent > 0 && _Exponent_adjustment < 0) { + if (_Is_hexadecimal) { + const ptrdiff_t _Further_adjustment = (_STD max)(-((_Exponent - 1) / 4 + 1), _Exponent_adjustment); + _Exponent += _Further_adjustment * 4; + _Exponent_adjustment -= _Further_adjustment; + } else { + const ptrdiff_t _Further_adjustment = (_STD max)(-_Exponent, _Exponent_adjustment); + _Exponent += _Further_adjustment; + _Exponent_adjustment -= _Further_adjustment; + + } + } else if (_Exponent < 0 && _Exponent_adjustment > 0) { + if (_Is_hexadecimal) { + const ptrdiff_t _Further_adjustment = (_STD min)((-_Exponent - 1) / 4 + 1, _Exponent_adjustment); + _Exponent += _Further_adjustment * 4; + _Exponent_adjustment -= _Further_adjustment; + } else { + const ptrdiff_t _Further_adjustment = (_STD min)(-_Exponent, _Exponent_adjustment); + _Exponent += _Further_adjustment; + _Exponent_adjustment -= _Further_adjustment; + } + } + + // And then _Exponent and _Exponent_adjustment are either both non-negative or both non-positive. + // So we can detect out-of-range cases directly. + if (_Exponent > _Maximum_temporary_decimal_exponent + || _Exponent_adjustment + > (_Is_hexadecimal ? _Maximum_temporary_decimal_exponent / 4 : _Maximum_temporary_decimal_exponent)) { _Assemble_floating_point_infinity(_Fp_string._Myis_negative, _Value); return {_Next, errc::result_out_of_range}; // Overflow example: "1e+9999" } - - if (_Exponent < _Minimum_temporary_decimal_exponent) { + if (_Exponent < _Minimum_temporary_decimal_exponent + || _Exponent_adjustment + < (_Is_hexadecimal ? _Minimum_temporary_decimal_exponent / 4 : _Minimum_temporary_decimal_exponent)) { _Assemble_floating_point_zero(_Fp_string._Myis_negative, _Value); return {_Next, errc::result_out_of_range}; // Underflow example: "1e-9999" } @@ -1741,7 +1780,7 @@ _NODISCARD from_chars_result _Ordinary_floating_from_chars(const char* const _Fi return {_Next, errc::result_out_of_range}; // Underflow example: "0.001e-5199" } - _Fp_string._Myexponent = _Exponent; + _Fp_string._Myexponent = static_cast(_Exponent); _Fp_string._Mymantissa_count = static_cast(_Mantissa_it - _Mantissa_first); if (_Is_hexadecimal) { diff --git a/tests/std/tests/P0067R5_charconv/double_from_chars_test_cases.hpp b/tests/std/tests/P0067R5_charconv/double_from_chars_test_cases.hpp index 40c4778f6e..8c35af82e3 100644 --- a/tests/std/tests/P0067R5_charconv/double_from_chars_test_cases.hpp +++ b/tests/std/tests/P0067R5_charconv/double_from_chars_test_cases.hpp @@ -1526,6 +1526,16 @@ inline constexpr DoubleFromCharsTestCase double_from_chars_test_cases[] = { "315290680984617210924625396728515625e-308", chars_format::scientific, 721, errc{}, 0x1.0000000000000p-1022}, + // GH-3161: slightly above the midpoint between 0x1.4258265642fe8p-1022 and 0x1.4258265642fe9p-1022 + {"28017185671564702625986967801367508381305145856029502167789836829722124560807078866183829589255835985468748869481" + "86583142362517164634520243360501584015713807233124871337198558567758800235976556111488605578858880932554217965863" + "07051360968462063659891899462054247537745976596872767714180852945844484408053399155613871082139550145579799707241" + "02739077746853707044081281775302910700845395388393628022004086658191413817026937499382994523889310476633341355125" + "36970161512201336865537906990405470959087815470627336860267082642705818724890923485099076407427813196992821890396" + "81414673574614863062987306855746291767811978264371471321046227200851894843139416687953473478884075454592166055730" + "14774895683738184268219531683594714240155669493243656420489173797250259667634963989257812500000001e-1083", + chars_format::scientific, 782, errc{}, 0x1.4258265642fe9p-1022}, + // (1 + 1 - 2 * 2^-53) * 2^1023 exactly {"17976931348623157081452742373170435679807056752584499659891747680315726078002853876058955863276687817154045895351" "43824642343213268894641827684675467035375169860499105765512820762454900903893289440758685084551339423045832369032" diff --git a/tests/std/tests/P0067R5_charconv/test.cpp b/tests/std/tests/P0067R5_charconv/test.cpp index d2d1ba0f37..c06b844479 100644 --- a/tests/std/tests/P0067R5_charconv/test.cpp +++ b/tests/std/tests/P0067R5_charconv/test.cpp @@ -936,6 +936,10 @@ void test_floating_from_chars(const chars_format fmt) { test_from_chars("-000", fmt, 4, errc{}, T{-0.0}); test_from_chars("-0e9999", fmt, 7, errc{}, T{-0.0}); test_from_chars("-0e-9999", fmt, 8, errc{}, T{-0.0}); + test_from_chars("1" + string(10000, '0') + "e-10000", fmt, 10008, errc{}, T{1.0}); + test_from_chars("0." + string(9999, '0') + "1e10000", fmt, 10008, errc{}, T{1.0}); + test_from_chars("1" + string(1280000, '0') + "e-1280000", fmt, 1280010, errc{}, T{1.0}); + test_from_chars("0." + string(1279999, '0') + "1e1280000", fmt, 1280010, errc{}, T{1.0}); test_from_chars("1e9999", fmt, 6, errc::result_out_of_range, inf); test_from_chars("-1e9999", fmt, 7, errc::result_out_of_range, -inf); test_from_chars("1e-9999", fmt, 7, errc::result_out_of_range, T{0}); @@ -978,6 +982,10 @@ void test_floating_from_chars(const chars_format fmt) { test_from_chars("-000", fmt, 4, errc{}, T{-0.0}); test_from_chars("-0p9999", fmt, 7, errc{}, T{-0.0}); test_from_chars("-0p-9999", fmt, 8, errc{}, T{-0.0}); + test_from_chars("1" + string(2500, '0') + "p-10000", fmt, 2508, errc{}, T{1.0}); + test_from_chars("0." + string(2499, '0') + "1p10000", fmt, 2508, errc{}, T{1.0}); + test_from_chars("1" + string(320000, '0') + "p-1280000", fmt, 320010, errc{}, T{1.0}); + test_from_chars("0." + string(319999, '0') + "1p1280000", fmt, 320010, errc{}, T{1.0}); test_from_chars("1p9999", fmt, 6, errc::result_out_of_range, inf); test_from_chars("-1p9999", fmt, 7, errc::result_out_of_range, -inf); test_from_chars("1p-9999", fmt, 7, errc::result_out_of_range, T{0}); From 4549227b507c6c0e279b613a221eab512d2cca23 Mon Sep 17 00:00:00 2001 From: "A. Jiang" Date: Tue, 25 Apr 2023 09:08:55 +0800 Subject: [PATCH 2/5] Adjust comment --- .../tests/P0067R5_charconv/double_from_chars_test_cases.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/std/tests/P0067R5_charconv/double_from_chars_test_cases.hpp b/tests/std/tests/P0067R5_charconv/double_from_chars_test_cases.hpp index 8c35af82e3..84bda79f4d 100644 --- a/tests/std/tests/P0067R5_charconv/double_from_chars_test_cases.hpp +++ b/tests/std/tests/P0067R5_charconv/double_from_chars_test_cases.hpp @@ -1526,7 +1526,8 @@ inline constexpr DoubleFromCharsTestCase double_from_chars_test_cases[] = { "315290680984617210924625396728515625e-308", chars_format::scientific, 721, errc{}, 0x1.0000000000000p-1022}, - // GH-3161: slightly above the midpoint between 0x1.4258265642fe8p-1022 and 0x1.4258265642fe9p-1022 + // GH-3161: ": from_chars, incorrect conversion of tiny doubles, when specified without a fractional part" + // slightly above the midpoint between 0x1.4258265642fe8p-1022 and 0x1.4258265642fe9p-1022, without decimal point {"28017185671564702625986967801367508381305145856029502167789836829722124560807078866183829589255835985468748869481" "86583142362517164634520243360501584015713807233124871337198558567758800235976556111488605578858880932554217965863" "07051360968462063659891899462054247537745976596872767714180852945844484408053399155613871082139550145579799707241" From ca97973be2d1ec4e7b30bca66b0e2c9ff9666597 Mon Sep 17 00:00:00 2001 From: "A. Jiang" Date: Tue, 25 Apr 2023 09:32:38 +0800 Subject: [PATCH 3/5] Delete a superfluous empty line --- stl/inc/charconv | 1 - 1 file changed, 1 deletion(-) diff --git a/stl/inc/charconv b/stl/inc/charconv index 6a231858a8..93bf58fe32 100644 --- a/stl/inc/charconv +++ b/stl/inc/charconv @@ -1732,7 +1732,6 @@ _NODISCARD from_chars_result _Ordinary_floating_from_chars(const char* const _Fi const ptrdiff_t _Further_adjustment = (_STD max)(-_Exponent, _Exponent_adjustment); _Exponent += _Further_adjustment; _Exponent_adjustment -= _Further_adjustment; - } } else if (_Exponent < 0 && _Exponent_adjustment > 0) { if (_Is_hexadecimal) { From 92ce7ebd606c27dfd8e94f9e30803026518ca0bf Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 26 Apr 2023 21:08:48 -0700 Subject: [PATCH 4/5] Add newline between non-chained `if`-statements. --- stl/inc/charconv | 1 + 1 file changed, 1 insertion(+) diff --git a/stl/inc/charconv b/stl/inc/charconv index 93bf58fe32..4864d04659 100644 --- a/stl/inc/charconv +++ b/stl/inc/charconv @@ -1753,6 +1753,7 @@ _NODISCARD from_chars_result _Ordinary_floating_from_chars(const char* const _Fi _Assemble_floating_point_infinity(_Fp_string._Myis_negative, _Value); return {_Next, errc::result_out_of_range}; // Overflow example: "1e+9999" } + if (_Exponent < _Minimum_temporary_decimal_exponent || _Exponent_adjustment < (_Is_hexadecimal ? _Minimum_temporary_decimal_exponent / 4 : _Minimum_temporary_decimal_exponent)) { From 045834534771905ceb24e7f5395cb6ead9a68a5a Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 26 Apr 2023 21:14:52 -0700 Subject: [PATCH 5/5] Move `_Exponent_adjustment_multiplier` up to simplify code. --- stl/inc/charconv | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/stl/inc/charconv b/stl/inc/charconv index 4864d04659..6099cfd6b4 100644 --- a/stl/inc/charconv +++ b/stl/inc/charconv @@ -1745,27 +1745,25 @@ _NODISCARD from_chars_result _Ordinary_floating_from_chars(const char* const _Fi } } + // In hexadecimal floating constants, the exponent is a base 2 exponent. The exponent adjustment computed during + // parsing has the same base as the mantissa (so, 16 for hexadecimal floating constants). + // We therefore need to scale the base 16 multiplier to base 2 by multiplying by log2(16): + const int _Exponent_adjustment_multiplier{_Is_hexadecimal ? 4 : 1}; + // And then _Exponent and _Exponent_adjustment are either both non-negative or both non-positive. // So we can detect out-of-range cases directly. if (_Exponent > _Maximum_temporary_decimal_exponent - || _Exponent_adjustment - > (_Is_hexadecimal ? _Maximum_temporary_decimal_exponent / 4 : _Maximum_temporary_decimal_exponent)) { + || _Exponent_adjustment > _Maximum_temporary_decimal_exponent / _Exponent_adjustment_multiplier) { _Assemble_floating_point_infinity(_Fp_string._Myis_negative, _Value); return {_Next, errc::result_out_of_range}; // Overflow example: "1e+9999" } if (_Exponent < _Minimum_temporary_decimal_exponent - || _Exponent_adjustment - < (_Is_hexadecimal ? _Minimum_temporary_decimal_exponent / 4 : _Minimum_temporary_decimal_exponent)) { + || _Exponent_adjustment < _Minimum_temporary_decimal_exponent / _Exponent_adjustment_multiplier) { _Assemble_floating_point_zero(_Fp_string._Myis_negative, _Value); return {_Next, errc::result_out_of_range}; // Underflow example: "1e-9999" } - // In hexadecimal floating constants, the exponent is a base 2 exponent. The exponent adjustment computed during - // parsing has the same base as the mantissa (so, 16 for hexadecimal floating constants). - // We therefore need to scale the base 16 multiplier to base 2 by multiplying by log2(16): - const int _Exponent_adjustment_multiplier{_Is_hexadecimal ? 4 : 1}; - _Exponent += _Exponent_adjustment * _Exponent_adjustment_multiplier; // Verify that after adjustment the exponent isn't wildly out of range (if it is, it isn't representable