diff --git a/src/arraytypes/views.jl b/src/arraytypes/views.jl index f13774b..0a43f6f 100644 --- a/src/arraytypes/views.jl +++ b/src/arraytypes/views.jl @@ -45,15 +45,33 @@ Base.size(l::View) = (l.ℓ,) if S <: Base.CodeUnits # BinaryView return !l.validity[i] ? missing : - v.length < 13 ? - Base.CodeUnits(StringView(@view l.inline[(((i - 1) * 16) + 5):(((i - 1) * 16) + 5 + v.length - 1)])) : - Base.CodeUnits(StringView(@view l.buffers[v.bufindex + 1][(v.offset + 1):(v.offset + v.length)])) + v.length < 13 ? + Base.CodeUnits( + StringView( + @view l.inline[(((i - 1) * 16) + 5):(((i - 1) * 16) + 5 + v.length - 1)] + ), + ) : + Base.CodeUnits( + StringView( + @view l.buffers[v.bufindex + 1][(v.offset + 1):(v.offset + v.length)] + ), + ) else # Utf8View return !l.validity[i] ? missing : - v.length < 13 ? - ArrowTypes.fromarrow(T, StringView(@view l.inline[(((i - 1) * 16) + 5):(((i - 1) * 16) + 5 + v.length - 1)])) : - ArrowTypes.fromarrow(T, StringView(@view l.buffers[v.bufindex + 1][(v.offset + 1):(v.offset + v.length)])) + v.length < 13 ? + ArrowTypes.fromarrow( + T, + StringView( + @view l.inline[(((i - 1) * 16) + 5):(((i - 1) * 16) + 5 + v.length - 1)] + ), + ) : + ArrowTypes.fromarrow( + T, + StringView( + @view l.buffers[v.bufindex + 1][(v.offset + 1):(v.offset + v.length)] + ), + ) end end diff --git a/src/eltypes.jl b/src/eltypes.jl index 3a681d0..52dbb80 100644 --- a/src/eltypes.jl +++ b/src/eltypes.jl @@ -129,12 +129,17 @@ function arrowtype(b, ::Type{T}) where {T<:AbstractFloat} return Meta.FloatingPoint, Meta.floatingPointEnd(b), nothing end -juliaeltype(f::Meta.Field, b::Union{Meta.Utf8,Meta.LargeUtf8,Meta.Utf8View}, convert) = String +juliaeltype(f::Meta.Field, b::Union{Meta.Utf8,Meta.LargeUtf8,Meta.Utf8View}, convert) = + String datasizeof(x) = sizeof(x) datasizeof(x::AbstractVector) = sum(datasizeof, x) -juliaeltype(f::Meta.Field, b::Union{Meta.Binary,Meta.LargeBinary,Meta.BinaryView}, convert) = Base.CodeUnits +juliaeltype( + f::Meta.Field, + b::Union{Meta.Binary,Meta.LargeBinary,Meta.BinaryView}, + convert, +) = Base.CodeUnits juliaeltype(f::Meta.Field, x::Meta.FixedSizeBinary, convert) = NTuple{Int(x.byteWidth),UInt8} @@ -428,7 +433,11 @@ ArrowTypes.JuliaType(::Val{PERIOD_SYMBOL}, ::Type{Duration{U}}) where {U} = peri ArrowTypes.fromarrow(::Type{P}, x::Duration{U}) where {P<:Dates.Period,U} = convert(P, x) # nested types; call juliaeltype recursively on nested children -function juliaeltype(f::Meta.Field, list::Union{Meta.List,Meta.LargeList,Meta.ListView,Meta.LargeListView}, convert) +function juliaeltype( + f::Meta.Field, + list::Union{Meta.List,Meta.LargeList,Meta.ListView,Meta.LargeListView}, + convert, +) return Vector{juliaeltype(f.children[1], buildmetadata(f.children[1]), convert)} end diff --git a/src/metadata/Message.jl b/src/metadata/Message.jl index b788314..0e49439 100644 --- a/src/metadata/Message.jl +++ b/src/metadata/Message.jl @@ -75,7 +75,8 @@ struct RecordBatch <: FlatBuffers.Table pos::Base.Int end -Base.propertynames(x::RecordBatch) = (:length, :nodes, :buffers, :compression, :variadicBufferCounts) +Base.propertynames(x::RecordBatch) = + (:length, :nodes, :buffers, :compression, :variadicBufferCounts) function Base.getproperty(x::RecordBatch, field::Symbol) if field === :length diff --git a/src/table.jl b/src/table.jl index 57a78c0..cdfdfb7 100644 --- a/src/table.jl +++ b/src/table.jl @@ -407,9 +407,10 @@ function DataAPI.colmetadatakeys(t::Table, col) end function DataAPI.colmetadatakeys(t::Table) - return (col => DataAPI.colmetadatakeys(t, col) for - col in Tables.columnnames(t) if - getmetadata(t[col]) !== nothing) + return ( + col => DataAPI.colmetadatakeys(t, col) for + col in Tables.columnnames(t) if getmetadata(t[col]) !== nothing + ) end Tables.istable(::Table) = true @@ -580,7 +581,8 @@ function Table(blobs::Vector{ArrowBlob}; convert::Bool=true) anyrecordbatches = true @debug "parsing record batch message: compression = $(header.compression)" @wkspawn begin - cols = collect(VectorIterator(sch, $batch, dictencodingslockable, convert)) + cols = + collect(VectorIterator(sch, $batch, dictencodingslockable, convert)) put!(() -> put!(tsks, cols), sync, $(rbi)) end rbi += 1 @@ -726,8 +728,17 @@ function build(field::Meta.Field, batch, rb, de, nodeidx, bufferidx, varbufferid nodeidx += 1 bufferidx += 1 else - A, nodeidx, bufferidx, varbufferidx = - build(field, field.type, batch, rb, de, nodeidx, bufferidx, varbufferidx, convert) + A, nodeidx, bufferidx, varbufferidx = build( + field, + field.type, + batch, + rb, + de, + nodeidx, + bufferidx, + varbufferidx, + convert, + ) end return A, nodeidx, bufferidx, varbufferidx end @@ -805,7 +816,17 @@ end const SubVector{T,P} = SubArray{T,1,P,Tuple{UnitRange{Int64}},true} -function build(f::Meta.Field, L::ListTypes, batch, rb, de, nodeidx, bufferidx, varbufferidx, convert) +function build( + f::Meta.Field, + L::ListTypes, + batch, + rb, + de, + nodeidx, + bufferidx, + varbufferidx, + convert, +) @debug "building array: L = $L" validity = buildbitmap(batch, rb, nodeidx, bufferidx) bufferidx += 1 @@ -839,10 +860,23 @@ function build(f::Meta.Field, L::ListTypes, batch, rb, de, nodeidx, bufferidx, v T = S == T ? ST : Union{Missing,ST} end end - return List{T,OT,typeof(A)}(bytes, validity, offsets, A, len, meta), nodeidx, bufferidx, varbufferidx + return List{T,OT,typeof(A)}(bytes, validity, offsets, A, len, meta), + nodeidx, + bufferidx, + varbufferidx end -function build(f::Meta.Field, L::ViewTypes, batch, rb, de, nodeidx, bufferidx, varbufferidx, convert) +function build( + f::Meta.Field, + L::ViewTypes, + batch, + rb, + de, + nodeidx, + bufferidx, + varbufferidx, + convert, +) @debug "building array: L = $L" validity = buildbitmap(batch, rb, nodeidx, bufferidx) bufferidx += 1 @@ -861,7 +895,10 @@ function build(f::Meta.Field, L::ViewTypes, batch, rb, de, nodeidx, bufferidx, v nodeidx += 1 meta = buildmetadata(f.custom_metadata) T = juliaeltype(f, meta, convert) - return View{T}(batch.bytes, validity, views, inline, buffers, len, meta), nodeidx, bufferidx, varbufferidx + return View{T}(batch.bytes, validity, views, inline, buffers, len, meta), + nodeidx, + bufferidx, + varbufferidx end function build( @@ -891,10 +928,23 @@ function build( end meta = buildmetadata(f.custom_metadata) T = juliaeltype(f, meta, convert) - return FixedSizeList{T,typeof(A)}(bytes, validity, A, len, meta), nodeidx, bufferidx, varbufferidx + return FixedSizeList{T,typeof(A)}(bytes, validity, A, len, meta), + nodeidx, + bufferidx, + varbufferidx end -function build(f::Meta.Field, L::Meta.Map, batch, rb, de, nodeidx, bufferidx, varbufferidx, convert) +function build( + f::Meta.Field, + L::Meta.Map, + batch, + rb, + de, + nodeidx, + bufferidx, + varbufferidx, + convert, +) @debug "building array: L = $L" validity = buildbitmap(batch, rb, nodeidx, bufferidx) bufferidx += 1 @@ -906,13 +956,27 @@ function build(f::Meta.Field, L::Meta.Map, batch, rb, de, nodeidx, bufferidx, va bufferidx += 1 len = rb.nodes[nodeidx].length nodeidx += 1 - A, nodeidx, bufferidx, varbufferidx = build(f.children[1], batch, rb, de, nodeidx, bufferidx, varbufferidx, convert) + A, nodeidx, bufferidx, varbufferidx = + build(f.children[1], batch, rb, de, nodeidx, bufferidx, varbufferidx, convert) meta = buildmetadata(f.custom_metadata) T = juliaeltype(f, meta, convert) - return Map{T,OT,typeof(A)}(validity, offsets, A, len, meta), nodeidx, bufferidx, varbufferidx + return Map{T,OT,typeof(A)}(validity, offsets, A, len, meta), + nodeidx, + bufferidx, + varbufferidx end -function build(f::Meta.Field, L::Meta.Struct, batch, rb, de, nodeidx, bufferidx, varbufferidx, convert) +function build( + f::Meta.Field, + L::Meta.Struct, + batch, + rb, + de, + nodeidx, + bufferidx, + varbufferidx, + convert, +) @debug "building array: L = $L" validity = buildbitmap(batch, rb, nodeidx, bufferidx) bufferidx += 1 @@ -920,17 +984,31 @@ function build(f::Meta.Field, L::Meta.Struct, batch, rb, de, nodeidx, bufferidx, vecs = [] nodeidx += 1 for child in f.children - A, nodeidx, bufferidx, varbufferidx = build(child, batch, rb, de, nodeidx, bufferidx, varbufferidx, convert) + A, nodeidx, bufferidx, varbufferidx = + build(child, batch, rb, de, nodeidx, bufferidx, varbufferidx, convert) push!(vecs, A) end data = Tuple(vecs) meta = buildmetadata(f.custom_metadata) T = juliaeltype(f, meta, convert) fnames = ntuple(i -> Symbol(f.children[i].name), length(f.children)) - return Struct{T,typeof(data),fnames}(validity, data, len, meta), nodeidx, bufferidx, varbufferidx + return Struct{T,typeof(data),fnames}(validity, data, len, meta), + nodeidx, + bufferidx, + varbufferidx end -function build(f::Meta.Field, L::Meta.Union, batch, rb, de, nodeidx, bufferidx, varbufferidx, convert) +function build( + f::Meta.Field, + L::Meta.Union, + batch, + rb, + de, + nodeidx, + bufferidx, + varbufferidx, + convert, +) @debug "building array: L = $L" buffer = rb.buffers[bufferidx] bytes, typeIds = reinterp(UInt8, batch, buffer, rb.compression) @@ -943,7 +1021,8 @@ function build(f::Meta.Field, L::Meta.Union, batch, rb, de, nodeidx, bufferidx, vecs = [] nodeidx += 1 for child in f.children - A, nodeidx, bufferidx, varbufferidx = build(child, batch, rb, de, nodeidx, bufferidx, varbufferidx, convert) + A, nodeidx, bufferidx, varbufferidx = + build(child, batch, rb, de, nodeidx, bufferidx, varbufferidx, convert) push!(vecs, A) end data = Tuple(vecs) @@ -958,18 +1037,38 @@ function build(f::Meta.Field, L::Meta.Union, batch, rb, de, nodeidx, bufferidx, return B, nodeidx, bufferidx, varbufferidx end -function build(f::Meta.Field, L::Meta.Null, batch, rb, de, nodeidx, bufferidx, varbufferidx, convert) +function build( + f::Meta.Field, + L::Meta.Null, + batch, + rb, + de, + nodeidx, + bufferidx, + varbufferidx, + convert, +) @debug "building array: L = $L" meta = buildmetadata(f.custom_metadata) T = juliaeltype(f, meta, convert) return NullVector{maybemissing(T)}(MissingVector(rb.nodes[nodeidx].length), meta), - nodeidx + 1, - bufferidx, - varbufferidx + nodeidx + 1, + bufferidx, + varbufferidx end # primitives -function build(f::Meta.Field, ::L, batch, rb, de, nodeidx, bufferidx, varbufferidx, convert) where {L} +function build( + f::Meta.Field, + ::L, + batch, + rb, + de, + nodeidx, + bufferidx, + varbufferidx, + convert, +) where {L} @debug "building array: L = $L" validity = buildbitmap(batch, rb, nodeidx, bufferidx) bufferidx += 1 @@ -982,10 +1081,23 @@ function build(f::Meta.Field, ::L, batch, rb, de, nodeidx, bufferidx, varbufferi len = rb.nodes[nodeidx].length T = juliaeltype(f, meta, convert) @debug "final julia type for primitive: T = $T" - return Primitive(T, bytes, validity, A, len, meta), nodeidx + 1, bufferidx + 1, varbufferidx + return Primitive(T, bytes, validity, A, len, meta), + nodeidx + 1, + bufferidx + 1, + varbufferidx end -function build(f::Meta.Field, L::Meta.Bool, batch, rb, de, nodeidx, bufferidx, varbufferidx, convert) +function build( + f::Meta.Field, + L::Meta.Bool, + batch, + rb, + de, + nodeidx, + bufferidx, + varbufferidx, + convert, +) @debug "building array: L = $L" validity = buildbitmap(batch, rb, nodeidx, bufferidx) bufferidx += 1 @@ -1010,5 +1122,8 @@ function build(f::Meta.Field, L::Meta.Bool, batch, rb, de, nodeidx, bufferidx, v end len = rb.nodes[nodeidx].length T = juliaeltype(f, meta, convert) - return BoolVector{T}(decodedbytes, pos, validity, len, meta), nodeidx + 1, bufferidx + 1, varbufferidx + return BoolVector{T}(decodedbytes, pos, validity, len, meta), + nodeidx + 1, + bufferidx + 1, + varbufferidx end diff --git a/test/runtests.jl b/test/runtests.jl index 20bdffb..d720fa0 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1073,7 +1073,7 @@ end @testset "DataAPI.metadata" begin df = DataFrame(a=1, b=2, c=3) - for i in 1:2 + for i = 1:2 io = IOBuffer() if i == 1 # skip writing metadata in the first iteration Arrow.write(io, df) @@ -1099,7 +1099,8 @@ end @test colmetadata(tbl, :b) == colmetadata(df, :b) @test_throws MethodError colmetadata(tbl, :b, "xyz") @test colmetadata(tbl, :b, "xyz", "something") == "something" - @test colmetadata(tbl, :b, "xyz", "something"; style=true) == ("something", :default) + @test colmetadata(tbl, :b, "xyz", "something"; style=true) == + ("something", :default) @test Set(colmetadatakeys(tbl)) == Set(colmetadatakeys(df)) # add metadata for the second iteration