From 36b724de043301042d43d0d6bb4194c77b141605 Mon Sep 17 00:00:00 2001
From: "A. Jiang" <de34@live.cn>
Date: Tue, 25 Apr 2023 07:36:26 +0800
Subject: [PATCH 1/5] Overhaul floating-point `from_chars`

- handle non-zero tail digits in the integral part
- change the calculation of the exponent part
---
 stl/inc/charconv                              | 71 ++++++++++++++-----
 .../double_from_chars_test_cases.hpp          | 10 +++
 tests/std/tests/P0067R5_charconv/test.cpp     |  8 +++
 3 files changed, 73 insertions(+), 16 deletions(-)
diff --git a/stl/inc/charconv b/stl/inc/charconv
index f2326f7e15..6a231858a8 100644
--- a/stl/inc/charconv
+++ b/stl/inc/charconv
@@ -1579,6 +1579,8 @@ _NODISCARD from_chars_result _Ordinary_floating_from_chars(const char* const _Fi
     }
     const char* const _Leading_zero_end = _Next;
 
+    bool _Has_zero_tail = true;
+
     // Scan the integer part of the mantissa:
     for (; _Next != _Last; ++_Next) {
         const unsigned char _Digit_value = _Digit_from_char(*_Next);
@@ -1589,20 +1591,18 @@ _NODISCARD from_chars_result _Ordinary_floating_from_chars(const char* const _Fi
 
         if (_Mantissa_it != _Mantissa_last) {
             *_Mantissa_it++ = _Digit_value;
+        } else {
+            _Has_zero_tail = _Has_zero_tail && _Digit_value == 0;
         }
     }
     const char* const _Whole_end = _Next;
 
-    // Defend against _Exponent_adjustment integer overflow. (These values don't need to be strict.)
-    constexpr ptrdiff_t _Maximum_adjustment = 1'000'000;
-    constexpr ptrdiff_t _Minimum_adjustment = -1'000'000;
-
     // The exponent adjustment holds the number of digits in the mantissa buffer that appeared before the radix point.
     // It can be negative, and leading zeroes in the integer part are ignored. Examples:
     // For "03333.111", it is 4.
     // For "00000.111", it is 0.
     // For "00000.001", it is -2.
-    int _Exponent_adjustment = static_cast<int>((_STD min)(_Whole_end - _Leading_zero_end, _Maximum_adjustment));
+    ptrdiff_t _Exponent_adjustment = _Whole_end - _Leading_zero_end;
 
     // [_Whole_end, _Dot_end) will contain 0 or 1 '.' characters
     if (_Next != _Last && *_Next == '.') {
@@ -1618,12 +1618,10 @@ _NODISCARD from_chars_result _Ordinary_floating_from_chars(const char* const _Fi
         for (; _Next != _Last && *_Next == '0'; ++_Next) {
         }
 
-        _Exponent_adjustment = static_cast<int>((_STD max)(_Dot_end - _Next, _Minimum_adjustment));
+        _Exponent_adjustment = _Dot_end - _Next;
     }
 
     // Scan the fractional part of the mantissa:
-    bool _Has_zero_tail = true;
-
     for (; _Next != _Last; ++_Next) {
         const unsigned char _Digit_value = _Digit_from_char(*_Next);
 
@@ -1647,7 +1645,8 @@ _NODISCARD from_chars_result _Ordinary_floating_from_chars(const char* const _Fi
     const char _Exponent_prefix{_Is_hexadecimal ? 'p' : 'e'};
 
     bool _Exponent_is_negative = false;
-    int _Exponent              = 0;
+    bool _Exp_abs_too_large    = false;
+    ptrdiff_t _Exponent        = 0;
 
     constexpr int _Maximum_temporary_decimal_exponent = 5200;
     constexpr int _Minimum_temporary_decimal_exponent = -5200;
@@ -1672,8 +1671,10 @@ _NODISCARD from_chars_result _Ordinary_floating_from_chars(const char* const _Fi
 
             // found decimal digit
 
-            if (_Exponent <= _Maximum_temporary_decimal_exponent) {
+            if (_Exponent < PTRDIFF_MAX / 10 || (_Exponent == PTRDIFF_MAX / 10 && _Digit_value <= PTRDIFF_MAX % 10)) {
                 _Exponent = _Exponent * 10 + _Digit_value;
+            } else {
+                _Exp_abs_too_large = true;
             }
 
             ++_Unread;
@@ -1710,14 +1711,52 @@ _NODISCARD from_chars_result _Ordinary_floating_from_chars(const char* const _Fi
         return {_Next, errc{}};
     }
 
-    // Before we adjust the exponent, handle the case where we detected a wildly
-    // out of range exponent during parsing and clamped the value:
-    if (_Exponent > _Maximum_temporary_decimal_exponent) {
+    // Handle exponent of an overly large absolute value.
+    if (_Exp_abs_too_large) {
+        if (_Exponent > 0) {
+            _Assemble_floating_point_infinity(_Fp_string._Myis_negative, _Value);
+            return {_Next, errc::result_out_of_range};
+        } else {
+            _Assemble_floating_point_zero(_Fp_string._Myis_negative, _Value);
+            return {_Next, errc::result_out_of_range};
+        }
+    }
+
+    // Adjust _Exponent and _Exponent_adjustment when they have different signedness to avoid overflow.
+    if (_Exponent > 0 && _Exponent_adjustment < 0) {
+        if (_Is_hexadecimal) {
+            const ptrdiff_t _Further_adjustment = (_STD max)(-((_Exponent - 1) / 4 + 1), _Exponent_adjustment);
+            _Exponent += _Further_adjustment * 4;
+            _Exponent_adjustment -= _Further_adjustment;
+        } else {
+            const ptrdiff_t _Further_adjustment = (_STD max)(-_Exponent, _Exponent_adjustment);
+            _Exponent += _Further_adjustment;
+            _Exponent_adjustment -= _Further_adjustment;
+
+        }
+    } else if (_Exponent < 0 && _Exponent_adjustment > 0) {
+        if (_Is_hexadecimal) {
+            const ptrdiff_t _Further_adjustment = (_STD min)((-_Exponent - 1) / 4 + 1, _Exponent_adjustment);
+            _Exponent += _Further_adjustment * 4;
+            _Exponent_adjustment -= _Further_adjustment;
+        } else {
+            const ptrdiff_t _Further_adjustment = (_STD min)(-_Exponent, _Exponent_adjustment);
+            _Exponent += _Further_adjustment;
+            _Exponent_adjustment -= _Further_adjustment;
+        }
+    }
+
+    // And then _Exponent and _Exponent_adjustment are either both non-negative or both non-positive.
+    // So we can detect out-of-range cases directly.
+    if (_Exponent > _Maximum_temporary_decimal_exponent
+        || _Exponent_adjustment
+               > (_Is_hexadecimal ? _Maximum_temporary_decimal_exponent / 4 : _Maximum_temporary_decimal_exponent)) {
         _Assemble_floating_point_infinity(_Fp_string._Myis_negative, _Value);
         return {_Next, errc::result_out_of_range}; // Overflow example: "1e+9999"
     }
-
-    if (_Exponent < _Minimum_temporary_decimal_exponent) {
+    if (_Exponent < _Minimum_temporary_decimal_exponent
+        || _Exponent_adjustment
+               < (_Is_hexadecimal ? _Minimum_temporary_decimal_exponent / 4 : _Minimum_temporary_decimal_exponent)) {
         _Assemble_floating_point_zero(_Fp_string._Myis_negative, _Value);
         return {_Next, errc::result_out_of_range}; // Underflow example: "1e-9999"
     }
@@ -1741,7 +1780,7 @@ _NODISCARD from_chars_result _Ordinary_floating_from_chars(const char* const _Fi
         return {_Next, errc::result_out_of_range}; // Underflow example: "0.001e-5199"
     }
 
-    _Fp_string._Myexponent       = _Exponent;
+    _Fp_string._Myexponent       = static_cast<int32_t>(_Exponent);
     _Fp_string._Mymantissa_count = static_cast<uint32_t>(_Mantissa_it - _Mantissa_first);
 
     if (_Is_hexadecimal) {
diff --git a/tests/std/tests/P0067R5_charconv/double_from_chars_test_cases.hpp b/tests/std/tests/P0067R5_charconv/double_from_chars_test_cases.hpp
index 40c4778f6e..8c35af82e3 100644
--- a/tests/std/tests/P0067R5_charconv/double_from_chars_test_cases.hpp
+++ b/tests/std/tests/P0067R5_charconv/double_from_chars_test_cases.hpp
@@ -1526,6 +1526,16 @@ inline constexpr DoubleFromCharsTestCase double_from_chars_test_cases[] = {
      "315290680984617210924625396728515625e-308",
         chars_format::scientific, 721, errc{}, 0x1.0000000000000p-1022},
 
+    // GH-3161: slightly above the midpoint between 0x1.4258265642fe8p-1022 and 0x1.4258265642fe9p-1022
+    {"28017185671564702625986967801367508381305145856029502167789836829722124560807078866183829589255835985468748869481"
+     "86583142362517164634520243360501584015713807233124871337198558567758800235976556111488605578858880932554217965863"
+     "07051360968462063659891899462054247537745976596872767714180852945844484408053399155613871082139550145579799707241"
+     "02739077746853707044081281775302910700845395388393628022004086658191413817026937499382994523889310476633341355125"
+     "36970161512201336865537906990405470959087815470627336860267082642705818724890923485099076407427813196992821890396"
+     "81414673574614863062987306855746291767811978264371471321046227200851894843139416687953473478884075454592166055730"
+     "14774895683738184268219531683594714240155669493243656420489173797250259667634963989257812500000001e-1083",
+        chars_format::scientific, 782, errc{}, 0x1.4258265642fe9p-1022},
+
     // (1 + 1 - 2 * 2^-53) * 2^1023 exactly
     {"17976931348623157081452742373170435679807056752584499659891747680315726078002853876058955863276687817154045895351"
      "43824642343213268894641827684675467035375169860499105765512820762454900903893289440758685084551339423045832369032"
diff --git a/tests/std/tests/P0067R5_charconv/test.cpp b/tests/std/tests/P0067R5_charconv/test.cpp
index d2d1ba0f37..c06b844479 100644
--- a/tests/std/tests/P0067R5_charconv/test.cpp
+++ b/tests/std/tests/P0067R5_charconv/test.cpp
@@ -936,6 +936,10 @@ void test_floating_from_chars(const chars_format fmt) {
         test_from_chars<T>("-000", fmt, 4, errc{}, T{-0.0});
         test_from_chars<T>("-0e9999", fmt, 7, errc{}, T{-0.0});
         test_from_chars<T>("-0e-9999", fmt, 8, errc{}, T{-0.0});
+        test_from_chars<T>("1" + string(10000, '0') + "e-10000", fmt, 10008, errc{}, T{1.0});
+        test_from_chars<T>("0." + string(9999, '0') + "1e10000", fmt, 10008, errc{}, T{1.0});
+        test_from_chars<T>("1" + string(1280000, '0') + "e-1280000", fmt, 1280010, errc{}, T{1.0});
+        test_from_chars<T>("0." + string(1279999, '0') + "1e1280000", fmt, 1280010, errc{}, T{1.0});
         test_from_chars<T>("1e9999", fmt, 6, errc::result_out_of_range, inf);
         test_from_chars<T>("-1e9999", fmt, 7, errc::result_out_of_range, -inf);
         test_from_chars<T>("1e-9999", fmt, 7, errc::result_out_of_range, T{0});
@@ -978,6 +982,10 @@ void test_floating_from_chars(const chars_format fmt) {
         test_from_chars<T>("-000", fmt, 4, errc{}, T{-0.0});
         test_from_chars<T>("-0p9999", fmt, 7, errc{}, T{-0.0});
         test_from_chars<T>("-0p-9999", fmt, 8, errc{}, T{-0.0});
+        test_from_chars<T>("1" + string(2500, '0') + "p-10000", fmt, 2508, errc{}, T{1.0});
+        test_from_chars<T>("0." + string(2499, '0') + "1p10000", fmt, 2508, errc{}, T{1.0});
+        test_from_chars<T>("1" + string(320000, '0') + "p-1280000", fmt, 320010, errc{}, T{1.0});
+        test_from_chars<T>("0." + string(319999, '0') + "1p1280000", fmt, 320010, errc{}, T{1.0});
         test_from_chars<T>("1p9999", fmt, 6, errc::result_out_of_range, inf);
         test_from_chars<T>("-1p9999", fmt, 7, errc::result_out_of_range, -inf);
         test_from_chars<T>("1p-9999", fmt, 7, errc::result_out_of_range, T{0});

From 4549227b507c6c0e279b613a221eab512d2cca23 Mon Sep 17 00:00:00 2001
From: "A. Jiang" <de34@live.cn>
Date: Tue, 25 Apr 2023 09:08:55 +0800
Subject: [PATCH 2/5] Adjust comment

---
 .../tests/P0067R5_charconv/double_from_chars_test_cases.hpp    | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/std/tests/P0067R5_charconv/double_from_chars_test_cases.hpp b/tests/std/tests/P0067R5_charconv/double_from_chars_test_cases.hpp
index 8c35af82e3..84bda79f4d 100644
--- a/tests/std/tests/P0067R5_charconv/double_from_chars_test_cases.hpp
+++ b/tests/std/tests/P0067R5_charconv/double_from_chars_test_cases.hpp
@@ -1526,7 +1526,8 @@ inline constexpr DoubleFromCharsTestCase double_from_chars_test_cases[] = {
      "315290680984617210924625396728515625e-308",
         chars_format::scientific, 721, errc{}, 0x1.0000000000000p-1022},
 
-    // GH-3161: slightly above the midpoint between 0x1.4258265642fe8p-1022 and 0x1.4258265642fe9p-1022
+    // GH-3161: "<charconv>: from_chars, incorrect conversion of tiny doubles, when specified without a fractional part"
+    // slightly above the midpoint between 0x1.4258265642fe8p-1022 and 0x1.4258265642fe9p-1022, without decimal point
     {"28017185671564702625986967801367508381305145856029502167789836829722124560807078866183829589255835985468748869481"
      "86583142362517164634520243360501584015713807233124871337198558567758800235976556111488605578858880932554217965863"
      "07051360968462063659891899462054247537745976596872767714180852945844484408053399155613871082139550145579799707241"

From ca97973be2d1ec4e7b30bca66b0e2c9ff9666597 Mon Sep 17 00:00:00 2001
From: "A. Jiang" <de34@live.cn>
Date: Tue, 25 Apr 2023 09:32:38 +0800
Subject: [PATCH 3/5] Delete a superfluous empty line

---
 stl/inc/charconv | 1 -
 1 file changed, 1 deletion(-)

diff --git a/stl/inc/charconv b/stl/inc/charconv
index 6a231858a8..93bf58fe32 100644
--- a/stl/inc/charconv
+++ b/stl/inc/charconv
@@ -1732,7 +1732,6 @@ _NODISCARD from_chars_result _Ordinary_floating_from_chars(const char* const _Fi
             const ptrdiff_t _Further_adjustment = (_STD max)(-_Exponent, _Exponent_adjustment);
             _Exponent += _Further_adjustment;
             _Exponent_adjustment -= _Further_adjustment;
-
         }
     } else if (_Exponent < 0 && _Exponent_adjustment > 0) {
         if (_Is_hexadecimal) {

From 92ce7ebd606c27dfd8e94f9e30803026518ca0bf Mon Sep 17 00:00:00 2001
From: "Stephan T. Lavavej" <stl@microsoft.com>
Date: Wed, 26 Apr 2023 21:08:48 -0700
Subject: [PATCH 4/5] Add newline between non-chained `if`-statements.

---
 stl/inc/charconv | 1 +
 1 file changed, 1 insertion(+)

diff --git a/stl/inc/charconv b/stl/inc/charconv
index 93bf58fe32..4864d04659 100644
--- a/stl/inc/charconv
+++ b/stl/inc/charconv
@@ -1753,6 +1753,7 @@ _NODISCARD from_chars_result _Ordinary_floating_from_chars(const char* const _Fi
         _Assemble_floating_point_infinity(_Fp_string._Myis_negative, _Value);
         return {_Next, errc::result_out_of_range}; // Overflow example: "1e+9999"
     }
+
     if (_Exponent < _Minimum_temporary_decimal_exponent
         || _Exponent_adjustment
                < (_Is_hexadecimal ? _Minimum_temporary_decimal_exponent / 4 : _Minimum_temporary_decimal_exponent)) {

From 045834534771905ceb24e7f5395cb6ead9a68a5a Mon Sep 17 00:00:00 2001
From: "Stephan T. Lavavej" <stl@microsoft.com>
Date: Wed, 26 Apr 2023 21:14:52 -0700
Subject: [PATCH 5/5] Move `_Exponent_adjustment_multiplier` up to simplify
 code.

---
 stl/inc/charconv | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/stl/inc/charconv b/stl/inc/charconv
index 4864d04659..6099cfd6b4 100644
--- a/stl/inc/charconv
+++ b/stl/inc/charconv
@@ -1745,27 +1745,25 @@ _NODISCARD from_chars_result _Ordinary_floating_from_chars(const char* const _Fi
         }
     }
 
+    // In hexadecimal floating constants, the exponent is a base 2 exponent. The exponent adjustment computed during
+    // parsing has the same base as the mantissa (so, 16 for hexadecimal floating constants).
+    // We therefore need to scale the base 16 multiplier to base 2 by multiplying by log2(16):
+    const int _Exponent_adjustment_multiplier{_Is_hexadecimal ? 4 : 1};
+
     // And then _Exponent and _Exponent_adjustment are either both non-negative or both non-positive.
     // So we can detect out-of-range cases directly.
     if (_Exponent > _Maximum_temporary_decimal_exponent
-        || _Exponent_adjustment
-               > (_Is_hexadecimal ? _Maximum_temporary_decimal_exponent / 4 : _Maximum_temporary_decimal_exponent)) {
+        || _Exponent_adjustment > _Maximum_temporary_decimal_exponent / _Exponent_adjustment_multiplier) {
         _Assemble_floating_point_infinity(_Fp_string._Myis_negative, _Value);
         return {_Next, errc::result_out_of_range}; // Overflow example: "1e+9999"
     }
 
     if (_Exponent < _Minimum_temporary_decimal_exponent
-        || _Exponent_adjustment
-               < (_Is_hexadecimal ? _Minimum_temporary_decimal_exponent / 4 : _Minimum_temporary_decimal_exponent)) {
+        || _Exponent_adjustment < _Minimum_temporary_decimal_exponent / _Exponent_adjustment_multiplier) {
         _Assemble_floating_point_zero(_Fp_string._Myis_negative, _Value);
         return {_Next, errc::result_out_of_range}; // Underflow example: "1e-9999"
     }
 
-    // In hexadecimal floating constants, the exponent is a base 2 exponent. The exponent adjustment computed during
-    // parsing has the same base as the mantissa (so, 16 for hexadecimal floating constants).
-    // We therefore need to scale the base 16 multiplier to base 2 by multiplying by log2(16):
-    const int _Exponent_adjustment_multiplier{_Is_hexadecimal ? 4 : 1};
-
     _Exponent += _Exponent_adjustment * _Exponent_adjustment_multiplier;
 
     // Verify that after adjustment the exponent isn't wildly out of range (if it is, it isn't representable