From 817f18fb9a58ea323d172bc4228b9e81ad8f6455 Mon Sep 17 00:00:00 2001 From: kimikage Date: Thu, 13 Aug 2020 21:08:24 +0900 Subject: [PATCH] Optimize multiplication for Normed This adds `wrapping_mul`, `saturating_mul` and `checked_mul` binary operations. However, this does not specialize them for `Fixed` and does not change `*` for `Fixed`. This replaces most of Normed's implementation of multiplication with integer operations. This improves the speed in many cases and the accuracy in some cases. --- src/FixedPointNumbers.jl | 6 +++++- src/normed.jl | 42 +++++++++++++++++++++++++++++++++++++--- test/fixed.jl | 30 ++++++++++++++++++++++++++-- test/normed.jl | 29 +++++++++++++++++++++++++++ 4 files changed, 101 insertions(+), 6 deletions(-) diff --git a/src/FixedPointNumbers.jl b/src/FixedPointNumbers.jl index 86c17e4f..5c4c3af0 100644 --- a/src/FixedPointNumbers.jl +++ b/src/FixedPointNumbers.jl @@ -189,6 +189,7 @@ float(x::FixedPoint) = convert(floattype(x), x) wrapping_neg(x::X) where {X <: FixedPoint} = X(-x.i, 0) wrapping_add(x::X, y::X) where {X <: FixedPoint} = X(x.i + y.i, 0) wrapping_sub(x::X, y::X) where {X <: FixedPoint} = X(x.i - y.i, 0) +wrapping_mul(x::X, y::X) where {X <: FixedPoint} = (float(x) * float(y)) % X # saturating arithmetic saturating_neg(x::X) where {X <: FixedPoint} = X(~min(x.i - true, x.i), 0) @@ -202,6 +203,8 @@ saturating_sub(x::X, y::X) where {X <: FixedPoint} = X(x.i - ifelse(x.i < 0, min(y.i, x.i - typemin(x.i)), max(y.i, x.i - typemax(x.i))), 0) saturating_sub(x::X, y::X) where {X <: FixedPoint{<:Unsigned}} = X(x.i - min(x.i, y.i), 0) +saturating_mul(x::X, y::X) where {X <: FixedPoint} = clamp(float(x) * float(y), X) + # checked arithmetic checked_neg(x::X) where {X <: FixedPoint} = checked_sub(zero(X), x) function checked_add(x::X, y::X) where {X <: FixedPoint} @@ -216,6 +219,7 @@ function checked_sub(x::X, y::X) where {X <: FixedPoint} f && throw_overflowerror(:-, x, y) z end +checked_mul(x::X, y::X) where {X <: FixedPoint} = X(float(x) * float(y)) # default arithmetic const DEFAULT_ARITHMETIC = :wrapping @@ -226,7 +230,7 @@ for (op, name) in ((:-, :neg), ) $op(x::X) where {X <: FixedPoint} = $f(x) end end -for (op, name) in ((:+, :add), (:-, :sub)) +for (op, name) in ((:+, :add), (:-, :sub), (:*, :mul)) f = Symbol(DEFAULT_ARITHMETIC, :_, name) @eval begin $op(x::X, y::X) where {X <: FixedPoint} = $f(x, y) diff --git a/src/normed.jl b/src/normed.jl index 83188107..15c9cfc5 100644 --- a/src/normed.jl +++ b/src/normed.jl @@ -127,7 +127,6 @@ function rem(x::Float64, ::Type{N}) where {f, N <: Normed{UInt64,f}} reinterpret(N, r << UInt8(f - 53) - unsigned(signed(r) >> 0x35)) end - function (::Type{T})(x::Normed) where {T <: AbstractFloat} # The following optimization for constant division may cause rounding errors. # y = reinterpret(x)*(one(rawtype(x))/convert(T, rawone(x))) @@ -248,8 +247,45 @@ Base.BigFloat(x::Normed) = reinterpret(x) / BigFloat(rawone(x)) Base.Rational(x::Normed) = reinterpret(x)//rawone(x) -# unchecked arithmetic -*(x::T, y::T) where {T <: Normed} = convert(T,convert(floattype(T), x)*convert(floattype(T), y)) +# Division by `2^f-1` with RoundNearest. The result would be in the lower half bits. +div_2fm1(x::T, ::Val{f}) where {T, f} = (x + (T(1)<<(f - 1) - 0x1)) รท (T(1) << f - 0x1) +div_2fm1(x::T, ::Val{1}) where T = x +div_2fm1(x::UInt16, ::Val{8}) = (((x + 0x80) >> 0x8) + x + 0x80) >> 0x8 +div_2fm1(x::UInt32, ::Val{16}) = (((x + 0x8000) >> 0x10) + x + 0x8000) >> 0x10 +div_2fm1(x::UInt64, ::Val{32}) = (((x + 0x80000000) >> 0x20) + x + 0x80000000) >> 0x20 +div_2fm1(x::UInt64, ::Val{64}) = (((x + 0x8000000000000000) >> 0x40) + x + 0x8000000000000000) >> 0x40 + +# wrapping arithmetic +function wrapping_mul(x::N, y::N) where {T <: Union{UInt8,UInt16,UInt32,UInt64}, f, N <: Normed{T,f}} + z = widemul(x.i, y.i) + N(div_2fm1(z, Val(Int(f))) % T, 0) +end + +# saturating arithmetic +function saturating_mul(x::N, y::N) where {T <: Union{UInt8,UInt16,UInt32,UInt64}, f, N <: Normed{T,f}} + f == bitwidth(T) && return wrapping_mul(x, y) + z = min(widemul(x.i, y.i), widemul(typemax(N).i, rawone(N))) + N(div_2fm1(z, Val(Int(f))) % T, 0) +end + +# checked arithmetic +function checked_mul(x::N, y::N) where {N <: Normed} + z = float(x) * float(y) + z < typemax(N) + eps(N)/2 || throw_overflowerror(:*, x, y) + z % N +end +function checked_mul(x::N, y::N) where {T <: Union{UInt8,UInt16,UInt32,UInt64}, f, N <: Normed{T,f}} + f == bitwidth(T) && return wrapping_mul(x, y) + z = widemul(x.i, y.i) + m = widemul(typemax(N).i, rawone(N)) + (rawone(N) >> 0x1) + z < m || throw_overflowerror(:*, x, y) + N(div_2fm1(z, Val(Int(f))) % T, 0) +end + +# TODO: decide the default arithmetic for `Normed` mul +# Override the default arithmetic with `checked` for backward compatibility +*(x::N, y::N) where {N <: Normed} = checked_mul(x, y) + /(x::T, y::T) where {T <: Normed} = convert(T,convert(floattype(T), x)/convert(floattype(T), y)) # Functions diff --git a/test/fixed.jl b/test/fixed.jl index 8f8ad8a6..218949a2 100644 --- a/test/fixed.jl +++ b/test/fixed.jl @@ -343,12 +343,38 @@ end xys = ((x, y) for x in xs, y in xs) fsub(x, y) = float(x) - float(y) @test all(((x, y),) -> wrapping_add(wrapping_sub(x, y), y) === x, xys) - @test all(((x, y),) -> saturating_sub(x, y) == clamp(fsub(x, y), F), xys) - @test all(((x, y),) -> !(typemin(F) < fsub(x, y) < typemax(F)) || + @test all(((x, y),) -> saturating_sub(x, y) === clamp(fsub(x, y), F), xys) + @test all(((x, y),) -> !(typemin(F) <= fsub(x, y) <= typemax(F)) || wrapping_sub(x, y) === checked_sub(x, y) === fsub(x, y) % F, xys) end end +@testset "mul" begin + for F in target(Fixed; ex = :thin) + @test wrapping_mul(typemax(F), zero(F)) === zero(F) + @test saturating_mul(typemax(F), zero(F)) === zero(F) + @test checked_mul(typemax(F), zero(F)) === zero(F) + + @test wrapping_mul(F(-1), typemax(F)) === -typemax(F) + @test saturating_mul(F(-1), typemax(F)) === -typemax(F) + @test checked_mul(F(-1), typemax(F)) === -typemax(F) + + # FIXME: Both the rhs and lhs of the following test may be inaccurate. + @test_skip wrapping_mul(typemin(F), typemax(F)) === big(typemin(F)) * big(typemax(F)) % F + @test saturating_mul(typemin(F), typemax(F)) === typemin(F) + @test_throws Exception checked_mul(typemin(F), typemax(F)) # TODO: Exception -> OverflowError + end + for F in target(Fixed, :i8; ex = :thin) + xs = typemin(F):eps(F):typemax(F) + xys = ((x, y) for x in xs, y in xs) + fmul(x, y) = float(x) * float(y) # note that precision(Float32) < 32 + @test all(((x, y),) -> wrapping_mul(x, y) === fmul(x, y) % F, xys) + @test all(((x, y),) -> saturating_mul(x, y) === clamp(fmul(x, y), F), xys) + @test all(((x, y),) -> !(typemin(F) <= fmul(x, y) <= typemax(F)) || + wrapping_mul(x, y) === checked_mul(x, y), xys) + end +end + @testset "rounding" begin for sym in (:i8, :i16, :i32, :i64) T = symbol_to_inttype(Fixed, sym) diff --git a/test/normed.jl b/test/normed.jl index f3a1ff20..22b6f375 100644 --- a/test/normed.jl +++ b/test/normed.jl @@ -374,6 +374,35 @@ end end end +@testset "mul" begin + for N in target(Normed; ex = :thin) + @test wrapping_mul(typemax(N), zero(N)) === zero(N) + @test saturating_mul(typemax(N), zero(N)) === zero(N) + @test checked_mul(typemax(N), zero(N)) === zero(N) + + @test wrapping_mul(one(N), typemax(N)) === typemax(N) + @test saturating_mul(one(N), typemax(N)) === typemax(N) + @test checked_mul(one(N), typemax(N)) === typemax(N) + + @test wrapping_mul(typemax(N), typemax(N)) === big(typemax(N))^2 % N + @test saturating_mul(typemax(N), typemax(N)) === typemax(N) + if typemax(N) == 1 + @test checked_mul(typemax(N), typemax(N)) === typemax(N) + else + @test_throws OverflowError checked_mul(typemax(N), typemax(N)) + end + end + for N in target(Normed, :i8; ex = :thin) + xs = typemin(N):eps(N):typemax(N) + xys = ((x, y) for x in xs, y in xs) + fmul(x, y) = float(x) * float(y) # note that precision(Float32) < 32 + @test all(((x, y),) -> wrapping_mul(x, y) === fmul(x, y) % N, xys) + @test all(((x, y),) -> saturating_mul(x, y) === clamp(fmul(x, y), N), xys) + @test all(((x, y),) -> !(typemin(N) <= fmul(x, y) <= typemax(N)) || + wrapping_mul(x, y) === checked_mul(x, y), xys) + end +end + @testset "div/fld1" begin @test div(reinterpret(N0f8, 0x10), reinterpret(N0f8, 0x02)) == fld(reinterpret(N0f8, 0x10), reinterpret(N0f8, 0x02)) == 8 @test div(reinterpret(N0f8, 0x0f), reinterpret(N0f8, 0x02)) == fld(reinterpret(N0f8, 0x0f), reinterpret(N0f8, 0x02)) == 7