From 19b041abc4948f5a52e30aef81c2816134753c19 Mon Sep 17 00:00:00 2001 From: Yichao Yu Date: Tue, 5 Apr 2016 15:40:27 -0400 Subject: [PATCH] Fix `ascii` and `utf8` Make conversion from c pointer to `ByteString`, `ASCIIString` and `UTF8String` more efficient and consistent. --- base/ascii.jl | 11 +++++++++-- base/pointer.jl | 5 ++++- base/strings/basic.jl | 8 ++++---- base/unicode/utf8.jl | 9 +++++++-- test/core.jl | 2 +- test/strings/basic.jl | 12 ++++++++++++ 6 files changed, 37 insertions(+), 10 deletions(-) diff --git a/base/ascii.jl b/base/ascii.jl index 40cde5ae6f4108..21c3e2a337c7dc 100644 --- a/base/ascii.jl +++ b/base/ascii.jl @@ -108,8 +108,15 @@ convert(::Type{ASCIIString}, a::Vector{UInt8}) = begin return ASCIIString(a) end -ascii(p::Ptr{UInt8}) = ASCIIString(bytestring(p)) -ascii(p::Ptr{UInt8}, len::Integer) = ascii(pointer_to_array(p, len)) +ascii(p::Ptr{UInt8}) = + ascii(p, p == C_NULL ? Csize_t(0) : ccall(:strlen, Csize_t, (Ptr{UInt8},), p)) +function ascii(p::Ptr{UInt8}, len::Integer) + p == C_NULL && throw(ArgumentError("cannot convert NULL to string")) + ary = ccall(:jl_pchar_to_array, Vector{UInt8}, + (Ptr{UInt8}, Csize_t), p, len) + isvalid(ASCIIString, ary) || throw(ArgumentError("invalid ASCII sequence")) + ASCIIString(ary) +end function convert(::Type{ASCIIString}, a::Array{UInt8,1}, invalids_as::ASCIIString) l = length(a) diff --git a/base/pointer.jl b/base/pointer.jl index 2e8959e6ff9e4d..d8e2a82c03a7a2 100644 --- a/base/pointer.jl +++ b/base/pointer.jl @@ -29,7 +29,10 @@ unsafe_convert{T}(::Type{Ptr{T}}, a::Array{T}) = ccall(:jl_array_ptr, Ptr{T}, (A unsafe_convert(::Type{Ptr{Void}}, a::Array) = ccall(:jl_array_ptr, Ptr{Void}, (Any,), a) # unsafe pointer to array conversions -pointer_to_array(p, d::Integer, own=false) = pointer_to_array(p, (d,), own) +function pointer_to_array{T}(p::Ptr{T}, d::Integer, own::Bool=false) + ccall(:jl_ptr_to_array_1d, Vector{T}, + (Any, Ptr{Void}, Csize_t, Cint), Array{T,1}, p, d, own) +end function pointer_to_array{T,N}(p::Ptr{T}, dims::NTuple{N,Int}, own::Bool=false) ccall(:jl_ptr_to_array, Array{T,N}, (Any, Ptr{Void}, Any, Int32), Array{T,N}, p, dims, own) diff --git a/base/strings/basic.jl b/base/strings/basic.jl index ad0ee77abb914a..9bb7dbfa13a160 100644 --- a/base/strings/basic.jl +++ b/base/strings/basic.jl @@ -11,16 +11,17 @@ string() = "" string(s::AbstractString) = s bytestring() = "" -bytestring(s::Vector{UInt8}) = bytestring(pointer(s),length(s)) +bytestring(s::Vector{UInt8}) = + ccall(:jl_pchar_to_string, Ref{ByteString}, (Ptr{UInt8},Int), s, length(s)) function bytestring(p::Union{Ptr{UInt8},Ptr{Int8}}) - p == C_NULL ? throw(ArgumentError("cannot convert NULL to string")) : + p == C_NULL && throw(ArgumentError("cannot convert NULL to string")) ccall(:jl_cstr_to_string, Ref{ByteString}, (Cstring,), p) end bytestring(s::Cstring) = bytestring(convert(Ptr{UInt8}, s)) function bytestring(p::Union{Ptr{UInt8},Ptr{Int8}},len::Integer) - p == C_NULL ? throw(ArgumentError("cannot convert NULL to string")) : + p == C_NULL && throw(ArgumentError("cannot convert NULL to string")) ccall(:jl_pchar_to_string, Ref{ByteString}, (Ptr{UInt8},Int), p, len) end @@ -269,4 +270,3 @@ function filter(f, s::AbstractString) end takebuf_string(out) end - diff --git a/base/unicode/utf8.jl b/base/unicode/utf8.jl index 5f278c0e18b4b1..75e4c0094da0e7 100644 --- a/base/unicode/utf8.jl +++ b/base/unicode/utf8.jl @@ -355,5 +355,10 @@ function encode_to_utf8{T<:Union{UInt16, UInt32}}(::Type{T}, dat, len) UTF8String(buf) end -utf8(p::Ptr{UInt8}) = UTF8String(bytestring(p)) -utf8(p::Ptr{UInt8}, len::Integer) = utf8(pointer_to_array(p, len)) +utf8(p::Ptr{UInt8}) = + utf8(p, p == C_NULL ? Csize_t(0) : ccall(:strlen, Csize_t, (Ptr{UInt8},), p)) +function utf8(p::Ptr{UInt8}, len::Integer) + p == C_NULL && throw(ArgumentError("cannot convert NULL to string")) + UTF8String(ccall(:jl_pchar_to_array, Vector{UInt8}, + (Ptr{UInt8}, Csize_t), p, len)) +end diff --git a/test/core.jl b/test/core.jl index 5fddad873aa55a..774a81985af7b8 100644 --- a/test/core.jl +++ b/test/core.jl @@ -911,7 +911,7 @@ let @test aa == a aa = pointer_to_array(pointer(a), UInt16(length(a))) @test aa == a - @test_throws ErrorException pointer_to_array(pointer(a), -3) + @test_throws InexactError pointer_to_array(pointer(a), -3) end immutable FooBar diff --git a/test/strings/basic.jl b/test/strings/basic.jl index 829e69520cb648..b0258852d45fe1 100644 --- a/test/strings/basic.jl +++ b/test/strings/basic.jl @@ -227,6 +227,9 @@ s = "abcde\uff\u2000\U1f596" sp = pointer(s) @test utf8(sp) == s @test utf8(sp,5) == "abcde" +@test_throws ArgumentError ascii(sp) +@test ascii(sp, 5) == "abcde" +@test_throws ArgumentError ascii(sp, 6) @test typeof(utf8(sp)) == UTF8String @test get(tryparse(BigInt, "1234567890")) == BigInt(1234567890) @@ -496,3 +499,12 @@ foobaz(ch) = reinterpret(Char, typemax(UInt32)) @test utf8("a").*["b","c"] == ["ab","ac"] @test "a".*map(utf8,["b","c"]) == ["ab","ac"] @test ["a","b"].*["c","d"]' == ["ac" "ad"; "bc" "bd"] + +# Make sure NULL pointer are handled consistently by +# `bytestring`, `ascii` and `utf8` +@test_throws ArgumentError bytestring(Ptr{UInt8}(0)) +@test_throws ArgumentError bytestring(Ptr{UInt8}(0), 10) +@test_throws ArgumentError ascii(Ptr{UInt8}(0)) +@test_throws ArgumentError ascii(Ptr{UInt8}(0), 10) +@test_throws ArgumentError utf8(Ptr{UInt8}(0)) +@test_throws ArgumentError utf8(Ptr{UInt8}(0), 10)