Skip to content

Commit

Permalink
Fix ascii and utf8
Browse files Browse the repository at this point in the history
Make conversion from c pointer to `ByteString`, `ASCIIString` and `UTF8String`
more efficient and consistent.
  • Loading branch information
yuyichao committed Apr 6, 2016
1 parent 1461bcb commit 732a416
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 10 deletions.
11 changes: 9 additions & 2 deletions base/ascii.jl
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,15 @@ convert(::Type{ASCIIString}, a::Vector{UInt8}) = begin
return ASCIIString(a)
end

ascii(p::Ptr{UInt8}) = ASCIIString(bytestring(p))
ascii(p::Ptr{UInt8}, len::Integer) = ascii(pointer_to_array(p, len))
ascii(p::Ptr{UInt8}) =
ascii(p, p == C_NULL ? Csize_t(0) : ccall(:strlen, Csize_t, (Ptr{UInt8},), p))
function ascii(p::Ptr{UInt8}, len::Integer)
p == C_NULL && throw(ArgumentError("cannot convert NULL to string"))
ary = ccall(:jl_pchar_to_array, Vector{UInt8},
(Ptr{UInt8}, Csize_t), p, len)
isvalid(ASCIIString, ary) || throw(ArgumentError("invalid ASCII sequence"))
ASCIIString(ary)
end

function convert(::Type{ASCIIString}, a::Array{UInt8,1}, invalids_as::ASCIIString)
l = length(a)
Expand Down
5 changes: 4 additions & 1 deletion base/pointer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,10 @@ unsafe_convert{T}(::Type{Ptr{T}}, a::Array{T}) = ccall(:jl_array_ptr, Ptr{T}, (A
unsafe_convert(::Type{Ptr{Void}}, a::Array) = ccall(:jl_array_ptr, Ptr{Void}, (Any,), a)

# unsafe pointer to array conversions
pointer_to_array(p, d::Integer, own=false) = pointer_to_array(p, (d,), own)
function pointer_to_array{T}(p::Ptr{T}, d::Integer, own::Bool=false)
ccall(:jl_ptr_to_array_1d, Array{T,1},
(Any, Ptr{T}, Csize_t, Cint), Array{T,1}, p, d, own)
end
function pointer_to_array{T,N}(p::Ptr{T}, dims::NTuple{N,Int}, own::Bool=false)
ccall(:jl_ptr_to_array, Array{T,N}, (Any, Ptr{Void}, Any, Int32),
Array{T,N}, p, dims, own)
Expand Down
8 changes: 4 additions & 4 deletions base/strings/basic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,17 @@ string() = ""
string(s::AbstractString) = s

bytestring() = ""
bytestring(s::Vector{UInt8}) = bytestring(pointer(s),length(s))
bytestring(s::Vector{UInt8}) =
ccall(:jl_pchar_to_string, Ref{ByteString}, (Ptr{UInt8},Int), s, length(s))

function bytestring(p::Union{Ptr{UInt8},Ptr{Int8}})
p == C_NULL ? throw(ArgumentError("cannot convert NULL to string")) :
p == C_NULL && throw(ArgumentError("cannot convert NULL to string"))
ccall(:jl_cstr_to_string, Ref{ByteString}, (Cstring,), p)
end
bytestring(s::Cstring) = bytestring(convert(Ptr{UInt8}, s))

function bytestring(p::Union{Ptr{UInt8},Ptr{Int8}},len::Integer)
p == C_NULL ? throw(ArgumentError("cannot convert NULL to string")) :
p == C_NULL && throw(ArgumentError("cannot convert NULL to string"))
ccall(:jl_pchar_to_string, Ref{ByteString}, (Ptr{UInt8},Int), p, len)
end

Expand Down Expand Up @@ -269,4 +270,3 @@ function filter(f, s::AbstractString)
end
takebuf_string(out)
end

9 changes: 7 additions & 2 deletions base/unicode/utf8.jl
Original file line number Diff line number Diff line change
Expand Up @@ -355,5 +355,10 @@ function encode_to_utf8{T<:Union{UInt16, UInt32}}(::Type{T}, dat, len)
UTF8String(buf)
end

utf8(p::Ptr{UInt8}) = UTF8String(bytestring(p))
utf8(p::Ptr{UInt8}, len::Integer) = utf8(pointer_to_array(p, len))
utf8(p::Ptr{UInt8}) =
utf8(p, p == C_NULL ? Csize_t(0) : ccall(:strlen, Csize_t, (Ptr{UInt8},), p))
function utf8(p::Ptr{UInt8}, len::Integer)
p == C_NULL && throw(ArgumentError("cannot convert NULL to string"))
UTF8String(ccall(:jl_pchar_to_array, Vector{UInt8},
(Ptr{UInt8}, Csize_t), p, len))
end
2 changes: 1 addition & 1 deletion test/core.jl
Original file line number Diff line number Diff line change
Expand Up @@ -911,7 +911,7 @@ let
@test aa == a
aa = pointer_to_array(pointer(a), UInt16(length(a)))
@test aa == a
@test_throws ErrorException pointer_to_array(pointer(a), -3)
@test_throws InexactError pointer_to_array(pointer(a), -3)
end

immutable FooBar
Expand Down
12 changes: 12 additions & 0 deletions test/strings/basic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,9 @@ s = "abcde\uff\u2000\U1f596"
sp = pointer(s)
@test utf8(sp) == s
@test utf8(sp,5) == "abcde"
@test_throws ArgumentError ascii(sp)
@test ascii(sp, 5) == "abcde"
@test_throws ArgumentError ascii(sp, 6)
@test typeof(utf8(sp)) == UTF8String

@test get(tryparse(BigInt, "1234567890")) == BigInt(1234567890)
Expand Down Expand Up @@ -496,3 +499,12 @@ foobaz(ch) = reinterpret(Char, typemax(UInt32))
@test utf8("a").*["b","c"] == ["ab","ac"]
@test "a".*map(utf8,["b","c"]) == ["ab","ac"]
@test ["a","b"].*["c","d"]' == ["ac" "ad"; "bc" "bd"]

# Make sure NULL pointer are handled consistently by
# `bytestring`, `ascii` and `utf8`
@test_throws ArgumentError bytestring(Ptr{UInt8}(0))
@test_throws ArgumentError bytestring(Ptr{UInt8}(0), 10)
@test_throws ArgumentError ascii(Ptr{UInt8}(0))
@test_throws ArgumentError ascii(Ptr{UInt8}(0), 10)
@test_throws ArgumentError utf8(Ptr{UInt8}(0))
@test_throws ArgumentError utf8(Ptr{UInt8}(0), 10)

0 comments on commit 732a416

Please sign in to comment.