From 5ca8669107049c68908da17a83559879e802acd9 Mon Sep 17 00:00:00 2001 From: kimikage Date: Mon, 29 Jun 2020 18:19:32 +0900 Subject: [PATCH 1/5] Optimize integer-->string conversions This avoids invalidations caused by invalidating `StringVector(::Integer)`. This also makes `bin()`, `dec`() and `hex()` slightly faster, but does not change the Printf. --- base/intfuncs.jl | 97 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 67 insertions(+), 30 deletions(-) diff --git a/base/intfuncs.jl b/base/intfuncs.jl index de790df019281..3f88f844e603b 100644 --- a/base/intfuncs.jl +++ b/base/intfuncs.jl @@ -616,11 +616,26 @@ ndigits(x::Integer; base::Integer=10, pad::Integer=1) = max(pad, ndigits0z(x, ba ## integer to string functions ## function bin(x::Unsigned, pad::Integer, neg::Bool) - i = neg + max(pad,sizeof(x)<<3-leading_zeros(x)) - a = StringVector(i) + m = 8sizeof(x) - leading_zeros(x)::Int + n = neg + max((pad % Int)::Int, m) + a = StringVector(n) + # for i in 0x0:UInt(n-1) # automatic vectorization produces redundant codes + # @inbounds a[n - i] = 0x30 + (((x >> i) % UInt8)::UInt8 & 0x1) + # end + i = n + @inbounds while i >= 4 + b = UInt32((x % UInt8)::UInt8) + d = 0x30303030 + ((b * 0x08040201) >> 0x3) & 0x01010101 + a[i-3] = (d >> 0x00) % UInt8 + a[i-2] = (d >> 0x08) % UInt8 + a[i-1] = (d >> 0x10) % UInt8 + a[i] = (d >> 0x18) % UInt8 + x >>= 0x4 + i -= 4 + end while i > neg - @inbounds a[i] = 48+(x&0x1) - x >>= 1 + @inbounds a[i] = 0x30 + ((x % UInt8)::UInt8 & 0x1) + x >>= 0x1 i -= 1 end if neg; @inbounds a[1]=0x2d; end @@ -628,62 +643,84 @@ function bin(x::Unsigned, pad::Integer, neg::Bool) end function oct(x::Unsigned, pad::Integer, neg::Bool) - i = neg + max(pad,div((sizeof(x)<<3)-leading_zeros(x)+2,3)) - a = StringVector(i) + m = div(8sizeof(x) - leading_zeros(x)::Int + 2, 3) + n = neg + max((pad % Int)::Int, m) + a = StringVector(n) + i = n while i > neg - @inbounds a[i] = 48+(x&0x7) - x >>= 3 + @inbounds a[i] = 0x30 + ((x % UInt8)::UInt8 & 0x7) + x >>= 0x3 i -= 1 end if neg; @inbounds a[1]=0x2d; end String(a) end +# 2-digit decimal characters ("00":"99") +const _dec_d100 = UInt16[(0x30 + i % 10) << 0x8 + (0x30 + i ÷ 10) for i = 0:99] + function dec(x::Unsigned, pad::Integer, neg::Bool) - i = neg + ndigits(x, base=10, pad=pad) - a = StringVector(i) - while i > neg - @inbounds a[i] = 48+rem(x,10) - x = oftype(x,div(x,10)) - i -= 1 + n = neg + ndigits(x, base=10, pad=(pad % Int)::Int)::Int + a = StringVector(n) + i = n + @inbounds while i >= 2 + d, r = divrem(x, 0x64) + d100 = _dec_d100[(r % Int)::Int + 1] + a[i-1] = d100 % UInt8 + a[i] = (d100 >> 0x8) % UInt8 + x = oftype(x, d) + i -= 2 + end + if i > neg + @inbounds a[i] = 0x30 + (rem(x, 0xa) % UInt8)::UInt8 end if neg; @inbounds a[1]=0x2d; end String(a) end function hex(x::Unsigned, pad::Integer, neg::Bool) - i = neg + max(pad,(sizeof(x)<<1)-(leading_zeros(x)>>2)) - a = StringVector(i) - while i > neg - d = x & 0xf - @inbounds a[i] = 48+d+39*(d>9) - x >>= 4 - i -= 1 + m = 2sizeof(x) - (leading_zeros(x)::Int >> 2) + n = neg + max((pad % Int)::Int, m) + a = StringVector(n) + i = n + while i >= 2 + b = (x % UInt8)::UInt8 + d1, d2 = b >> 0x4, b & 0xf + @inbounds a[i-1] = d1 + ifelse(d1 > 0x9, 0x57, 0x30) + @inbounds a[i] = d2 + ifelse(d2 > 0x9, 0x57, 0x30) + x >>= 0x8 + i -= 2 + end + if i > neg + d = (x % UInt8)::UInt8 & 0xf + @inbounds a[i] = d + ifelse(d > 0x9, 0x57, 0x30) end if neg; @inbounds a[1]=0x2d; end String(a) end -const base36digits = ['0':'9';'a':'z'] -const base62digits = ['0':'9';'A':'Z';'a':'z'] +const base36digits = UInt8['0':'9';'a':'z'] +const base62digits = UInt8['0':'9';'A':'Z';'a':'z'] -function _base(b::Integer, x::Integer, pad::Integer, neg::Bool) - (x >= 0) | (b < 0) || throw(DomainError(x, "For negative `x`, `b` must be negative.")) +function _base(base::Integer, x::Integer, pad::Integer, neg::Bool) + b = (base % Int)::Int + (x >= 0) | (b < 0) || throw(DomainError(x, "For negative `x`, `base` must be negative.")) 2 <= abs(b) <= 62 || throw(DomainError(b, "base must satisfy 2 ≤ abs(base) ≤ 62")) digits = abs(b) <= 36 ? base36digits : base62digits - i = neg + ndigits(x, base=b, pad=pad) - a = StringVector(i) + n = neg + ndigits(x, base=b, pad=(pad % Int)::Int)::Int + a = StringVector(n) + i = n @inbounds while i > neg if b > 0 - a[i] = digits[1+rem(x,b)] + a[i] = digits[1 + (rem(x, b) % Int)::Int] x = div(x,b) else - a[i] = digits[1+mod(x,-b)] + a[i] = digits[1 + (mod(x, -b) % Int)::Int] x = cld(x,b) end i -= 1 end - if neg; a[1]='-'; end + if neg; @inbounds a[1]=0x2d; end String(a) end From 6e06b3d7edb71f8bd626ad032129182a1173e242 Mon Sep 17 00:00:00 2001 From: kimikage Date: Thu, 2 Jul 2020 12:39:03 +0900 Subject: [PATCH 2/5] Change `pad` option handling --- base/intfuncs.jl | 25 +++++++++++++------------ test/intfuncs.jl | 1 + 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/base/intfuncs.jl b/base/intfuncs.jl index 3f88f844e603b..84c5bfff951a2 100644 --- a/base/intfuncs.jl +++ b/base/intfuncs.jl @@ -615,9 +615,9 @@ ndigits(x::Integer; base::Integer=10, pad::Integer=1) = max(pad, ndigits0z(x, ba ## integer to string functions ## -function bin(x::Unsigned, pad::Integer, neg::Bool) +function bin(x::Unsigned, pad::Int, neg::Bool) m = 8sizeof(x) - leading_zeros(x)::Int - n = neg + max((pad % Int)::Int, m) + n = neg + max(pad, m) a = StringVector(n) # for i in 0x0:UInt(n-1) # automatic vectorization produces redundant codes # @inbounds a[n - i] = 0x30 + (((x >> i) % UInt8)::UInt8 & 0x1) @@ -642,9 +642,9 @@ function bin(x::Unsigned, pad::Integer, neg::Bool) String(a) end -function oct(x::Unsigned, pad::Integer, neg::Bool) +function oct(x::Unsigned, pad::Int, neg::Bool) m = div(8sizeof(x) - leading_zeros(x)::Int + 2, 3) - n = neg + max((pad % Int)::Int, m) + n = neg + max(pad, m) a = StringVector(n) i = n while i > neg @@ -659,8 +659,8 @@ end # 2-digit decimal characters ("00":"99") const _dec_d100 = UInt16[(0x30 + i % 10) << 0x8 + (0x30 + i ÷ 10) for i = 0:99] -function dec(x::Unsigned, pad::Integer, neg::Bool) - n = neg + ndigits(x, base=10, pad=(pad % Int)::Int)::Int +function dec(x::Unsigned, pad::Int, neg::Bool) + n = neg + (ndigits(x, base=10, pad=pad) % Int)::Int a = StringVector(n) i = n @inbounds while i >= 2 @@ -678,9 +678,9 @@ function dec(x::Unsigned, pad::Integer, neg::Bool) String(a) end -function hex(x::Unsigned, pad::Integer, neg::Bool) +function hex(x::Unsigned, pad::Int, neg::Bool) m = 2sizeof(x) - (leading_zeros(x)::Int >> 2) - n = neg + max((pad % Int)::Int, m) + n = neg + max(pad, m) a = StringVector(n) i = n while i >= 2 @@ -702,12 +702,12 @@ end const base36digits = UInt8['0':'9';'a':'z'] const base62digits = UInt8['0':'9';'A':'Z';'a':'z'] -function _base(base::Integer, x::Integer, pad::Integer, neg::Bool) +function _base(base::Integer, x::Integer, pad::Int, neg::Bool) + (x >= 0) | (base < 0) || throw(DomainError(x, "For negative `x`, `base` must be negative.")) + 2 <= abs(base) <= 62 || throw(DomainError(base, "base must satisfy 2 ≤ abs(base) ≤ 62")) b = (base % Int)::Int - (x >= 0) | (b < 0) || throw(DomainError(x, "For negative `x`, `base` must be negative.")) - 2 <= abs(b) <= 62 || throw(DomainError(b, "base must satisfy 2 ≤ abs(base) ≤ 62")) digits = abs(b) <= 36 ? base36digits : base62digits - n = neg + ndigits(x, base=b, pad=(pad % Int)::Int)::Int + n = neg + (ndigits(x, base=b, pad=pad) % Int)::Int a = StringVector(n) i = n @inbounds while i > neg @@ -742,6 +742,7 @@ julia> string(13, base = 5, pad = 4) ``` """ function string(n::Integer; base::Integer = 10, pad::Integer = 1) + pad = (min(max(pad, typemin(Int)), typemax(Int)) % Int)::Int if base == 2 (n_positive, neg) = split_sign(n) bin(n_positive, pad, neg) diff --git a/test/intfuncs.jl b/test/intfuncs.jl index bf992d9f88877..72250d689dde3 100644 --- a/test/intfuncs.jl +++ b/test/intfuncs.jl @@ -304,6 +304,7 @@ end @test string(3, base = 2) == "11" @test string(3, pad = 2, base = 2) == "11" @test string(3, pad = Int32(2), base = Int32(2)) == "11" + @test string(3, pad = typemin(Int128) + 3, base = 0x2) == "11" @test string(3, pad = 3, base = 2) == "011" @test string(-3, base = 2) == "-11" @test string(-3, pad = 3, base = 2) == "-011" From ba0839e5e9b364067104c7ad519d472fea50f6f8 Mon Sep 17 00:00:00 2001 From: kimikage Date: Sun, 9 Aug 2020 01:08:37 +0900 Subject: [PATCH 3/5] Omit unnecessary type assertions --- base/intfuncs.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/base/intfuncs.jl b/base/intfuncs.jl index 84c5bfff951a2..58a839a0af16c 100644 --- a/base/intfuncs.jl +++ b/base/intfuncs.jl @@ -616,7 +616,7 @@ ndigits(x::Integer; base::Integer=10, pad::Integer=1) = max(pad, ndigits0z(x, ba ## integer to string functions ## function bin(x::Unsigned, pad::Int, neg::Bool) - m = 8sizeof(x) - leading_zeros(x)::Int + m = 8sizeof(x) - leading_zeros(x) n = neg + max(pad, m) a = StringVector(n) # for i in 0x0:UInt(n-1) # automatic vectorization produces redundant codes @@ -643,7 +643,7 @@ function bin(x::Unsigned, pad::Int, neg::Bool) end function oct(x::Unsigned, pad::Int, neg::Bool) - m = div(8sizeof(x) - leading_zeros(x)::Int + 2, 3) + m = div(8sizeof(x) - leading_zeros(x) + 2, 3) n = neg + max(pad, m) a = StringVector(n) i = n @@ -660,7 +660,7 @@ end const _dec_d100 = UInt16[(0x30 + i % 10) << 0x8 + (0x30 + i ÷ 10) for i = 0:99] function dec(x::Unsigned, pad::Int, neg::Bool) - n = neg + (ndigits(x, base=10, pad=pad) % Int)::Int + n = neg + ndigits(x, pad=pad) a = StringVector(n) i = n @inbounds while i >= 2 @@ -679,7 +679,7 @@ function dec(x::Unsigned, pad::Int, neg::Bool) end function hex(x::Unsigned, pad::Int, neg::Bool) - m = 2sizeof(x) - (leading_zeros(x)::Int >> 2) + m = 2sizeof(x) - (leading_zeros(x) >> 2) n = neg + max(pad, m) a = StringVector(n) i = n @@ -707,7 +707,7 @@ function _base(base::Integer, x::Integer, pad::Int, neg::Bool) 2 <= abs(base) <= 62 || throw(DomainError(base, "base must satisfy 2 ≤ abs(base) ≤ 62")) b = (base % Int)::Int digits = abs(b) <= 36 ? base36digits : base62digits - n = neg + (ndigits(x, base=b, pad=pad) % Int)::Int + n = neg + ndigits(x, base=b, pad=pad) a = StringVector(n) i = n @inbounds while i > neg From bd69475d44f6d991cd7115305d9fde9d1c0bd037 Mon Sep 17 00:00:00 2001 From: kimikage Date: Sun, 9 Aug 2020 08:23:09 +0900 Subject: [PATCH 4/5] Add out-of-range `base` tests --- test/intfuncs.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/intfuncs.jl b/test/intfuncs.jl index 72250d689dde3..bbe3928a9be50 100644 --- a/test/intfuncs.jl +++ b/test/intfuncs.jl @@ -339,6 +339,8 @@ end @test digits(-3, base = 2) == -[1, 1] @test digits(-42, base = 4) == -[2, 2, 2] + @test_throws DomainError string(5, base = typemin(Int128) + 10) + @testset "digits/base with bases powers of 2" begin @test digits(4, base = 2) == [0, 0, 1] @test digits(5, base = Int32(2), pad=Int32(3)) == [1, 0, 1] From 45582ecc4ef1d640023148027e51223b604fa6bf Mon Sep 17 00:00:00 2001 From: kimikage Date: Sat, 26 Sep 2020 21:00:54 +0900 Subject: [PATCH 5/5] Modify multiplication notation --- base/intfuncs.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/base/intfuncs.jl b/base/intfuncs.jl index 58a839a0af16c..6361a0a795489 100644 --- a/base/intfuncs.jl +++ b/base/intfuncs.jl @@ -616,7 +616,7 @@ ndigits(x::Integer; base::Integer=10, pad::Integer=1) = max(pad, ndigits0z(x, ba ## integer to string functions ## function bin(x::Unsigned, pad::Int, neg::Bool) - m = 8sizeof(x) - leading_zeros(x) + m = 8 * sizeof(x) - leading_zeros(x) n = neg + max(pad, m) a = StringVector(n) # for i in 0x0:UInt(n-1) # automatic vectorization produces redundant codes @@ -643,7 +643,7 @@ function bin(x::Unsigned, pad::Int, neg::Bool) end function oct(x::Unsigned, pad::Int, neg::Bool) - m = div(8sizeof(x) - leading_zeros(x) + 2, 3) + m = div(8 * sizeof(x) - leading_zeros(x) + 2, 3) n = neg + max(pad, m) a = StringVector(n) i = n @@ -679,7 +679,7 @@ function dec(x::Unsigned, pad::Int, neg::Bool) end function hex(x::Unsigned, pad::Int, neg::Bool) - m = 2sizeof(x) - (leading_zeros(x) >> 2) + m = 2 * sizeof(x) - (leading_zeros(x) >> 2) n = neg + max(pad, m) a = StringVector(n) i = n