Skip to content

Commit

Permalink
dec2flt: Refactor float traits
Browse files Browse the repository at this point in the history
A lot of the magic constants can be turned into expressions. This, with
the added documentation, should be more clear, and will also make it
easier to support `f16` and `f128`.
  • Loading branch information
tgross35 committed Dec 9, 2024
1 parent 121edd9 commit 4566132
Show file tree
Hide file tree
Showing 4 changed files with 166 additions and 96 deletions.
250 changes: 160 additions & 90 deletions library/core/src/num/dec2flt/float.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,56 @@
//! Helper trait for generic float types.
use core::f64;

use crate::fmt::{Debug, LowerExp};
use crate::num::FpCategory;
use crate::ops::{Add, Div, Mul, Neg};
use crate::ops::{self, Add, Div, Mul, Neg};

pub trait CastInto<T: Copy>: Copy {
fn cast(self) -> T;
}

pub trait Integer:
Sized
+ Clone
+ Copy
+ Debug
+ ops::Shr<u32, Output = Self>
+ ops::Shl<u32, Output = Self>
+ ops::BitAnd<Output = Self>
+ ops::BitOr<Output = Self>
+ PartialEq
+ CastInto<i16>
{
const ZERO: Self;
const ONE: Self;
}

/// A helper trait to avoid duplicating basically all the conversion code for `f32` and `f64`.
macro_rules! int {
($($ty:ty),+) => {
$(
impl CastInto<i16> for $ty {
fn cast(self) -> i16 {
self as i16
}
}


impl Integer for $ty {
const ZERO: Self = 0;
const ONE: Self = 1;
}
)+
}
}

int!(u16, u32, u64);

/// A helper trait to avoid duplicating basically all the conversion code for IEEE floats.
///
/// See the parent module's doc comment for why this is necessary.
///
/// Should **never ever** be implemented for other types or be used outside the dec2flt module.
/// Should **never ever** be implemented for other types or be used outside the `dec2flt` module.
#[doc(hidden)]
pub trait RawFloat:
Sized
Expand All @@ -24,62 +66,93 @@ pub trait RawFloat:
+ Copy
+ Debug
{
/// The unsigned integer with the same size as the float
type Int: Integer + Into<u64>;

/* general constants */

const INFINITY: Self;
const NEG_INFINITY: Self;
const NAN: Self;
const NEG_NAN: Self;

/// Bit width of the float
const BITS: u32;

/// Mantissa digits including the hidden bit (provided by core)
const MANTISSA_BITS: u32;

const EXPONENT_MASK: Self::Int;
const MANTISSA_MASK: Self::Int;

/// The number of bits in the significand, *excluding* the hidden bit.
const MANTISSA_EXPLICIT_BITS: usize;

// Round-to-even only happens for negative values of q
// when q ≥ −4 in the 64-bit case and when q ≥ −17 in
// the 32-bitcase.
//
// When q ≥ 0,we have that 5^q ≤ 2m+1. In the 64-bit case,we
// have 5^q ≤ 2m+1 ≤ 2^54 or q ≤ 23. In the 32-bit case,we have
// 5^q ≤ 2m+1 ≤ 2^25 or q ≤ 10.
//
// When q < 0, we have w ≥ (2m+1)×5^−q. We must have that w < 2^64
// so (2m+1)×5^−q < 2^64. We have that 2m+1 > 2^53 (64-bit case)
// or 2m+1 > 2^24 (32-bit case). Hence,we must have 2^53×5^−q < 2^64
// (64-bit) and 2^24×5^−q < 2^64 (32-bit). Hence we have 5^−q < 2^11
// or q ≥ −4 (64-bit case) and 5^−q < 2^40 or q ≥ −17 (32-bitcase).
//
// Thus we have that we only need to round ties to even when
// we have that q ∈ [−4,23](in the 64-bit case) or q∈[−17,10]
// (in the 32-bit case). In both cases,the power of five(5^|q|)
// fits in a 64-bit word.
const MANTISSA_EXPLICIT_BITS: u32 = Self::MANTISSA_BITS - 1;

/// Bits for the exponent
const EXPONENT_BITS: u32 = Self::BITS - Self::MANTISSA_EXPLICIT_BITS - 1;

/// Minimum exponent value `-(1 << (EXP_BITS - 1)) + 1`.
const MINIMUM_EXPONENT: i32 = -(1 << (Self::EXPONENT_BITS - 1)) + 1;

/// Maximum exponent without overflowing to infinity
const MAXIMUM_EXPONENT: u32 = (1 << Self::EXPONENT_BITS) - 1;

/// The exponent bias value
const EXPONENT_BIAS: u32 = Self::MAXIMUM_EXPONENT >> 1;

/// Largest exponent value `(1 << EXP_BITS) - 1`.
const INFINITE_POWER: i32 = (1 << Self::EXPONENT_BITS) - 1;

/// Round-to-even only happens for negative values of q
/// when q ≥ −4 in the 64-bit case and when q ≥ −17 in
/// the 32-bitcase.
///
/// When q ≥ 0,we have that 5^q ≤ 2m+1. In the 64-bit case,we
/// have 5^q ≤ 2m+1 ≤ 2^54 or q ≤ 23. In the 32-bit case,we have
/// 5^q ≤ 2m+1 ≤ 2^25 or q ≤ 10.
///
/// When q < 0, we have w ≥ (2m+1)×5^−q. We must have that w < 2^64
/// so (2m+1)×5^−q < 2^64. We have that 2m+1 > 2^53 (64-bit case)
/// or 2m+1 > 2^24 (32-bit case). Hence,we must have 2^53×5^−q < 2^64
/// (64-bit) and 2^24×5^−q < 2^64 (32-bit). Hence we have 5^−q < 2^11
/// or q ≥ −4 (64-bit case) and 5^−q < 2^40 or q ≥ −17 (32-bitcase).
///
/// Thus we have that we only need to round ties to even when
/// we have that q ∈ [−4,23](in the 64-bit case) or q∈[−17,10]
/// (in the 32-bit case). In both cases,the power of five(5^|q|)
/// fits in a 64-bit word.
const MIN_EXPONENT_ROUND_TO_EVEN: i32;
const MAX_EXPONENT_ROUND_TO_EVEN: i32;

// Minimum exponent that for a fast path case, or `-⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
const MIN_EXPONENT_FAST_PATH: i64;

// Maximum exponent that for a fast path case, or `⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
const MAX_EXPONENT_FAST_PATH: i64;
/// Largest decimal exponent for a non-infinite value.
///
/// This is the max exponent in binary converted to the max exponent in decimal. Allows fast
/// pathing anything larger than `10^LARGEST_POWER_OF_TEN`, which will round to infinity.
const LARGEST_POWER_OF_TEN: i32 =
((Self::EXPONENT_BIAS as f64 + 1.0) / f64::consts::LOG2_10) as i32;

// Maximum exponent that can be represented for a disguised-fast path case.
// This is `MAX_EXPONENT_FAST_PATH + ⌊(MANTISSA_EXPLICIT_BITS+1)/log2(10)⌋`
const MAX_EXPONENT_DISGUISED_FAST_PATH: i64;
/// Smallest decimal exponent for a non-zero value. This allows for fast pathing anything
/// smaller than `10^SMALLEST_POWER_OF_TEN`.
const SMALLEST_POWER_OF_TEN: i32 =
-(((Self::EXPONENT_BIAS + Self::MANTISSA_BITS + 64) as f64) / f64::consts::LOG2_10) as i32;

// Minimum exponent value `-(1 << (EXP_BITS - 1)) + 1`.
const MINIMUM_EXPONENT: i32;
/* Fast pathing */

// Largest exponent value `(1 << EXP_BITS) - 1`.
const INFINITE_POWER: i32;
/// Maximum exponent for a fast path case, or `⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
// assuming FLT_EVAL_METHOD = 0
const MAX_EXPONENT_FAST_PATH: i64 =
((Self::MANTISSA_BITS as f64) / (f64::consts::LOG2_10 - 1.0)) as i64;

// Index (in bits) of the sign.
const SIGN_INDEX: usize;
/// Minimum exponent for a fast path case, or `-⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
const MIN_EXPONENT_FAST_PATH: i64 = -Self::MAX_EXPONENT_FAST_PATH;

// Smallest decimal exponent for a non-zero value.
const SMALLEST_POWER_OF_TEN: i32;
/// Maximum exponent that can be represented for a disguised-fast path case.
/// This is `MAX_EXPONENT_FAST_PATH + ⌊(MANTISSA_EXPLICIT_BITS+1)/log2(10)⌋`
const MAX_EXPONENT_DISGUISED_FAST_PATH: i64 =
Self::MAX_EXPONENT_FAST_PATH + (Self::MANTISSA_BITS as f64 / f64::consts::LOG2_10) as i64;

// Largest decimal exponent for a non-infinite value.
const LARGEST_POWER_OF_TEN: i32;

// Maximum mantissa for the fast-path (`1 << 53` for f64).
const MAX_MANTISSA_FAST_PATH: u64 = 2_u64 << Self::MANTISSA_EXPLICIT_BITS;
/// Maximum mantissa for the fast-path (`1 << 53` for f64).
const MAX_MANTISSA_FAST_PATH: u64 = 1 << Self::MANTISSA_BITS;

/// Converts integer into float through an as cast.
/// This is only called in the fast-path algorithm, and therefore
Expand All @@ -96,27 +169,45 @@ pub trait RawFloat:
/// Returns the category that this number falls into.
fn classify(self) -> FpCategory;

/// Transmute to the integer representation
fn to_bits(self) -> Self::Int;

/// Returns the mantissa, exponent and sign as integers.
fn integer_decode(self) -> (u64, i16, i8);
///
/// That is, this returns `(m, p, s)` such that `s * m * 2^p` represents the original float.
/// For 0, the exponent will be `-(EXPONENT_BIAS + MANTISSA_EXPLICIT_BITS`, which is the
/// minimum subnormal power.
fn integer_decode(self) -> (u64, i16, i8) {
let bits = self.to_bits();
let sign: i8 = if bits >> (Self::BITS - 1) == Self::Int::ZERO { 1 } else { -1 };
let mut exponent: i16 =
((bits & Self::EXPONENT_MASK) >> Self::MANTISSA_EXPLICIT_BITS).cast();
let mantissa = if exponent == 0 {
(bits & Self::MANTISSA_MASK) << 1
} else {
(bits & Self::MANTISSA_MASK) | (Self::Int::ONE << Self::MANTISSA_EXPLICIT_BITS)
};
// Exponent bias + mantissa shift
exponent -= (Self::EXPONENT_BIAS + Self::MANTISSA_EXPLICIT_BITS) as i16;
(mantissa.into(), exponent, sign)
}
}

impl RawFloat for f32 {
type Int = u32;

const INFINITY: Self = f32::INFINITY;
const NEG_INFINITY: Self = f32::NEG_INFINITY;
const NAN: Self = f32::NAN;
const NEG_NAN: Self = -f32::NAN;

const MANTISSA_EXPLICIT_BITS: usize = 23;
const BITS: u32 = 32;
const MANTISSA_BITS: u32 = Self::MANTISSA_DIGITS;
const EXPONENT_MASK: Self::Int = Self::EXP_MASK;
const MANTISSA_MASK: Self::Int = Self::MAN_MASK;

const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -17;
const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 10;
const MIN_EXPONENT_FAST_PATH: i64 = -10; // assuming FLT_EVAL_METHOD = 0
const MAX_EXPONENT_FAST_PATH: i64 = 10;
const MAX_EXPONENT_DISGUISED_FAST_PATH: i64 = 17;
const MINIMUM_EXPONENT: i32 = -127;
const INFINITE_POWER: i32 = 0xFF;
const SIGN_INDEX: usize = 31;
const SMALLEST_POWER_OF_TEN: i32 = -65;
const LARGEST_POWER_OF_TEN: i32 = 38;

#[inline]
fn from_u64(v: u64) -> Self {
Expand All @@ -136,16 +227,8 @@ impl RawFloat for f32 {
TABLE[exponent & 15]
}

/// Returns the mantissa, exponent and sign as integers.
fn integer_decode(self) -> (u64, i16, i8) {
let bits = self.to_bits();
let sign: i8 = if bits >> 31 == 0 { 1 } else { -1 };
let mut exponent: i16 = ((bits >> 23) & 0xff) as i16;
let mantissa =
if exponent == 0 { (bits & 0x7fffff) << 1 } else { (bits & 0x7fffff) | 0x800000 };
// Exponent bias + mantissa shift
exponent -= 127 + 23;
(mantissa as u64, exponent, sign)
fn to_bits(self) -> Self::Int {
self.to_bits()
}

fn classify(self) -> FpCategory {
Expand All @@ -154,22 +237,20 @@ impl RawFloat for f32 {
}

impl RawFloat for f64 {
const INFINITY: Self = f64::INFINITY;
const NEG_INFINITY: Self = f64::NEG_INFINITY;
const NAN: Self = f64::NAN;
const NEG_NAN: Self = -f64::NAN;
type Int = u64;

const INFINITY: Self = Self::INFINITY;
const NEG_INFINITY: Self = Self::NEG_INFINITY;
const NAN: Self = Self::NAN;
const NEG_NAN: Self = -Self::NAN;

const BITS: u32 = 64;
const MANTISSA_BITS: u32 = Self::MANTISSA_DIGITS;
const EXPONENT_MASK: Self::Int = Self::EXP_MASK;
const MANTISSA_MASK: Self::Int = Self::MAN_MASK;

const MANTISSA_EXPLICIT_BITS: usize = 52;
const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -4;
const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 23;
const MIN_EXPONENT_FAST_PATH: i64 = -22; // assuming FLT_EVAL_METHOD = 0
const MAX_EXPONENT_FAST_PATH: i64 = 22;
const MAX_EXPONENT_DISGUISED_FAST_PATH: i64 = 37;
const MINIMUM_EXPONENT: i32 = -1023;
const INFINITE_POWER: i32 = 0x7FF;
const SIGN_INDEX: usize = 63;
const SMALLEST_POWER_OF_TEN: i32 = -342;
const LARGEST_POWER_OF_TEN: i32 = 308;

#[inline]
fn from_u64(v: u64) -> Self {
Expand All @@ -190,19 +271,8 @@ impl RawFloat for f64 {
TABLE[exponent & 31]
}

/// Returns the mantissa, exponent and sign as integers.
fn integer_decode(self) -> (u64, i16, i8) {
let bits = self.to_bits();
let sign: i8 = if bits >> 63 == 0 { 1 } else { -1 };
let mut exponent: i16 = ((bits >> 52) & 0x7ff) as i16;
let mantissa = if exponent == 0 {
(bits & 0xfffffffffffff) << 1
} else {
(bits & 0xfffffffffffff) | 0x10000000000000
};
// Exponent bias + mantissa shift
exponent -= 1023 + 52;
(mantissa, exponent, sign)
fn to_bits(self) -> Self::Int {
self.to_bits()
}

fn classify(self) -> FpCategory {
Expand Down
4 changes: 2 additions & 2 deletions library/core/src/num/dec2flt/lemire.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ pub fn compute_float<F: RawFloat>(q: i64, mut w: u64) -> BiasedFp {
// Normalize our significant digits, so the most-significant bit is set.
let lz = w.leading_zeros();
w <<= lz;
let (lo, hi) = compute_product_approx(q, w, F::MANTISSA_EXPLICIT_BITS + 3);
let (lo, hi) = compute_product_approx(q, w, F::MANTISSA_EXPLICIT_BITS as usize + 3);
if lo == 0xFFFF_FFFF_FFFF_FFFF {
// If we have failed to approximate w x 5^-q with our 128-bit value.
// Since the addition of 1 could lead to an overflow which could then
Expand Down Expand Up @@ -89,7 +89,7 @@ pub fn compute_float<F: RawFloat>(q: i64, mut w: u64) -> BiasedFp {
if lo <= 1
&& q >= F::MIN_EXPONENT_ROUND_TO_EVEN as i64
&& q <= F::MAX_EXPONENT_ROUND_TO_EVEN as i64
&& mantissa & 3 == 1
&& mantissa & 0b11 == 0b01
&& (mantissa << (upperbit + 64 - F::MANTISSA_EXPLICIT_BITS as i32 - 3)) == hi
{
// Zero the lowest bit, so we don't round up.
Expand Down
2 changes: 1 addition & 1 deletion library/core/src/num/dec2flt/slow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ pub(crate) fn parse_long_mantissa<F: RawFloat>(s: &[u8]) -> BiasedFp {
}
// Shift the decimal to the hidden bit, and then round the value
// to get the high mantissa+1 bits.
d.left_shift(F::MANTISSA_EXPLICIT_BITS + 1);
d.left_shift(F::MANTISSA_EXPLICIT_BITS as usize + 1);
let mut mantissa = d.round();
if mantissa >= (1_u64 << (F::MANTISSA_EXPLICIT_BITS + 1)) {
// Rounding up overflowed to the carry bit, need to
Expand Down
6 changes: 3 additions & 3 deletions src/etc/test-float-parse/src/traits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,12 +147,12 @@ pub trait Float:
}

macro_rules! impl_float {
($($fty:ty, $ity:ty, $bits:literal);+) => {
($($fty:ty, $ity:ty);+) => {
$(
impl Float for $fty {
type Int = $ity;
type SInt = <Self::Int as Int>::Signed;
const BITS: u32 = $bits;
const BITS: u32 = <$ity>::BITS;
const MAN_BITS: u32 = Self::MANTISSA_DIGITS - 1;
const MAN_MASK: Self::Int = (Self::Int::ONE << Self::MAN_BITS) - Self::Int::ONE;
const SIGN_MASK: Self::Int = Self::Int::ONE << (Self::BITS-1);
Expand All @@ -168,7 +168,7 @@ macro_rules! impl_float {
}
}

impl_float!(f32, u32, 32; f64, u64, 64);
impl_float!(f32, u32; f64, u64);

/// A test generator. Should provide an iterator that produces unique patterns to parse.
///
Expand Down

0 comments on commit 4566132

Please sign in to comment.