Skip to content

Commit

Permalink
Fix formatting of various files
Browse files Browse the repository at this point in the history
  • Loading branch information
quinnj committed Dec 3, 2024
1 parent ee00724 commit 0d3c98c
Show file tree
Hide file tree
Showing 5 changed files with 183 additions and 39 deletions.
30 changes: 24 additions & 6 deletions src/arraytypes/views.jl
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,33 @@ Base.size(l::View) = (l.ℓ,)
if S <: Base.CodeUnits
# BinaryView
return !l.validity[i] ? missing :
v.length < 13 ?
Base.CodeUnits(StringView(@view l.inline[(((i - 1) * 16) + 5):(((i - 1) * 16) + 5 + v.length - 1)])) :
Base.CodeUnits(StringView(@view l.buffers[v.bufindex + 1][(v.offset + 1):(v.offset + v.length)]))
v.length < 13 ?
Base.CodeUnits(
StringView(
@view l.inline[(((i - 1) * 16) + 5):(((i - 1) * 16) + 5 + v.length - 1)]
),
) :
Base.CodeUnits(
StringView(
@view l.buffers[v.bufindex + 1][(v.offset + 1):(v.offset + v.length)]
),
)
else
# Utf8View
return !l.validity[i] ? missing :
v.length < 13 ?
ArrowTypes.fromarrow(T, StringView(@view l.inline[(((i - 1) * 16) + 5):(((i - 1) * 16) + 5 + v.length - 1)])) :
ArrowTypes.fromarrow(T, StringView(@view l.buffers[v.bufindex + 1][(v.offset + 1):(v.offset + v.length)]))
v.length < 13 ?
ArrowTypes.fromarrow(
T,
StringView(
@view l.inline[(((i - 1) * 16) + 5):(((i - 1) * 16) + 5 + v.length - 1)]
),
) :
ArrowTypes.fromarrow(
T,
StringView(
@view l.buffers[v.bufindex + 1][(v.offset + 1):(v.offset + v.length)]
),
)
end
end

Expand Down
15 changes: 12 additions & 3 deletions src/eltypes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -129,12 +129,17 @@ function arrowtype(b, ::Type{T}) where {T<:AbstractFloat}
return Meta.FloatingPoint, Meta.floatingPointEnd(b), nothing
end

juliaeltype(f::Meta.Field, b::Union{Meta.Utf8,Meta.LargeUtf8,Meta.Utf8View}, convert) = String
juliaeltype(f::Meta.Field, b::Union{Meta.Utf8,Meta.LargeUtf8,Meta.Utf8View}, convert) =
String

datasizeof(x) = sizeof(x)
datasizeof(x::AbstractVector) = sum(datasizeof, x)

juliaeltype(f::Meta.Field, b::Union{Meta.Binary,Meta.LargeBinary,Meta.BinaryView}, convert) = Base.CodeUnits
juliaeltype(
f::Meta.Field,
b::Union{Meta.Binary,Meta.LargeBinary,Meta.BinaryView},
convert,
) = Base.CodeUnits

juliaeltype(f::Meta.Field, x::Meta.FixedSizeBinary, convert) =
NTuple{Int(x.byteWidth),UInt8}
Expand Down Expand Up @@ -428,7 +433,11 @@ ArrowTypes.JuliaType(::Val{PERIOD_SYMBOL}, ::Type{Duration{U}}) where {U} = peri
ArrowTypes.fromarrow(::Type{P}, x::Duration{U}) where {P<:Dates.Period,U} = convert(P, x)

# nested types; call juliaeltype recursively on nested children
function juliaeltype(f::Meta.Field, list::Union{Meta.List,Meta.LargeList,Meta.ListView,Meta.LargeListView}, convert)
function juliaeltype(
f::Meta.Field,
list::Union{Meta.List,Meta.LargeList,Meta.ListView,Meta.LargeListView},
convert,
)
return Vector{juliaeltype(f.children[1], buildmetadata(f.children[1]), convert)}
end

Expand Down
3 changes: 2 additions & 1 deletion src/metadata/Message.jl
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ struct RecordBatch <: FlatBuffers.Table
pos::Base.Int
end

Base.propertynames(x::RecordBatch) = (:length, :nodes, :buffers, :compression, :variadicBufferCounts)
Base.propertynames(x::RecordBatch) =
(:length, :nodes, :buffers, :compression, :variadicBufferCounts)

function Base.getproperty(x::RecordBatch, field::Symbol)
if field === :length
Expand Down
169 changes: 142 additions & 27 deletions src/table.jl
Original file line number Diff line number Diff line change
Expand Up @@ -407,9 +407,10 @@ function DataAPI.colmetadatakeys(t::Table, col)
end

function DataAPI.colmetadatakeys(t::Table)
return (col => DataAPI.colmetadatakeys(t, col) for
col in Tables.columnnames(t) if
getmetadata(t[col]) !== nothing)
return (
col => DataAPI.colmetadatakeys(t, col) for
col in Tables.columnnames(t) if getmetadata(t[col]) !== nothing
)
end

Tables.istable(::Table) = true
Expand Down Expand Up @@ -580,7 +581,8 @@ function Table(blobs::Vector{ArrowBlob}; convert::Bool=true)
anyrecordbatches = true
@debug "parsing record batch message: compression = $(header.compression)"
@wkspawn begin
cols = collect(VectorIterator(sch, $batch, dictencodingslockable, convert))
cols =
collect(VectorIterator(sch, $batch, dictencodingslockable, convert))
put!(() -> put!(tsks, cols), sync, $(rbi))
end
rbi += 1
Expand Down Expand Up @@ -726,8 +728,17 @@ function build(field::Meta.Field, batch, rb, de, nodeidx, bufferidx, varbufferid
nodeidx += 1
bufferidx += 1
else
A, nodeidx, bufferidx, varbufferidx =
build(field, field.type, batch, rb, de, nodeidx, bufferidx, varbufferidx, convert)
A, nodeidx, bufferidx, varbufferidx = build(
field,
field.type,
batch,
rb,
de,
nodeidx,
bufferidx,
varbufferidx,
convert,
)
end
return A, nodeidx, bufferidx, varbufferidx
end
Expand Down Expand Up @@ -805,7 +816,17 @@ end

const SubVector{T,P} = SubArray{T,1,P,Tuple{UnitRange{Int64}},true}

function build(f::Meta.Field, L::ListTypes, batch, rb, de, nodeidx, bufferidx, varbufferidx, convert)
function build(
f::Meta.Field,
L::ListTypes,
batch,
rb,
de,
nodeidx,
bufferidx,
varbufferidx,
convert,
)
@debug "building array: L = $L"
validity = buildbitmap(batch, rb, nodeidx, bufferidx)
bufferidx += 1
Expand Down Expand Up @@ -839,10 +860,23 @@ function build(f::Meta.Field, L::ListTypes, batch, rb, de, nodeidx, bufferidx, v
T = S == T ? ST : Union{Missing,ST}
end
end
return List{T,OT,typeof(A)}(bytes, validity, offsets, A, len, meta), nodeidx, bufferidx, varbufferidx
return List{T,OT,typeof(A)}(bytes, validity, offsets, A, len, meta),
nodeidx,
bufferidx,
varbufferidx
end

function build(f::Meta.Field, L::ViewTypes, batch, rb, de, nodeidx, bufferidx, varbufferidx, convert)
function build(
f::Meta.Field,
L::ViewTypes,
batch,
rb,
de,
nodeidx,
bufferidx,
varbufferidx,
convert,
)
@debug "building array: L = $L"
validity = buildbitmap(batch, rb, nodeidx, bufferidx)
bufferidx += 1
Expand All @@ -861,7 +895,10 @@ function build(f::Meta.Field, L::ViewTypes, batch, rb, de, nodeidx, bufferidx, v
nodeidx += 1
meta = buildmetadata(f.custom_metadata)
T = juliaeltype(f, meta, convert)
return View{T}(batch.bytes, validity, views, inline, buffers, len, meta), nodeidx, bufferidx, varbufferidx
return View{T}(batch.bytes, validity, views, inline, buffers, len, meta),
nodeidx,
bufferidx,
varbufferidx
end

function build(
Expand Down Expand Up @@ -891,10 +928,23 @@ function build(
end
meta = buildmetadata(f.custom_metadata)
T = juliaeltype(f, meta, convert)
return FixedSizeList{T,typeof(A)}(bytes, validity, A, len, meta), nodeidx, bufferidx, varbufferidx
return FixedSizeList{T,typeof(A)}(bytes, validity, A, len, meta),
nodeidx,
bufferidx,
varbufferidx
end

function build(f::Meta.Field, L::Meta.Map, batch, rb, de, nodeidx, bufferidx, varbufferidx, convert)
function build(
f::Meta.Field,
L::Meta.Map,
batch,
rb,
de,
nodeidx,
bufferidx,
varbufferidx,
convert,
)
@debug "building array: L = $L"
validity = buildbitmap(batch, rb, nodeidx, bufferidx)
bufferidx += 1
Expand All @@ -906,31 +956,59 @@ function build(f::Meta.Field, L::Meta.Map, batch, rb, de, nodeidx, bufferidx, va
bufferidx += 1
len = rb.nodes[nodeidx].length
nodeidx += 1
A, nodeidx, bufferidx, varbufferidx = build(f.children[1], batch, rb, de, nodeidx, bufferidx, varbufferidx, convert)
A, nodeidx, bufferidx, varbufferidx =
build(f.children[1], batch, rb, de, nodeidx, bufferidx, varbufferidx, convert)
meta = buildmetadata(f.custom_metadata)
T = juliaeltype(f, meta, convert)
return Map{T,OT,typeof(A)}(validity, offsets, A, len, meta), nodeidx, bufferidx, varbufferidx
return Map{T,OT,typeof(A)}(validity, offsets, A, len, meta),
nodeidx,
bufferidx,
varbufferidx
end

function build(f::Meta.Field, L::Meta.Struct, batch, rb, de, nodeidx, bufferidx, varbufferidx, convert)
function build(
f::Meta.Field,
L::Meta.Struct,
batch,
rb,
de,
nodeidx,
bufferidx,
varbufferidx,
convert,
)
@debug "building array: L = $L"
validity = buildbitmap(batch, rb, nodeidx, bufferidx)
bufferidx += 1
len = rb.nodes[nodeidx].length
vecs = []
nodeidx += 1
for child in f.children
A, nodeidx, bufferidx, varbufferidx = build(child, batch, rb, de, nodeidx, bufferidx, varbufferidx, convert)
A, nodeidx, bufferidx, varbufferidx =
build(child, batch, rb, de, nodeidx, bufferidx, varbufferidx, convert)
push!(vecs, A)
end
data = Tuple(vecs)
meta = buildmetadata(f.custom_metadata)
T = juliaeltype(f, meta, convert)
fnames = ntuple(i -> Symbol(f.children[i].name), length(f.children))
return Struct{T,typeof(data),fnames}(validity, data, len, meta), nodeidx, bufferidx, varbufferidx
return Struct{T,typeof(data),fnames}(validity, data, len, meta),
nodeidx,
bufferidx,
varbufferidx
end

function build(f::Meta.Field, L::Meta.Union, batch, rb, de, nodeidx, bufferidx, varbufferidx, convert)
function build(
f::Meta.Field,
L::Meta.Union,
batch,
rb,
de,
nodeidx,
bufferidx,
varbufferidx,
convert,
)
@debug "building array: L = $L"
buffer = rb.buffers[bufferidx]
bytes, typeIds = reinterp(UInt8, batch, buffer, rb.compression)
Expand All @@ -943,7 +1021,8 @@ function build(f::Meta.Field, L::Meta.Union, batch, rb, de, nodeidx, bufferidx,
vecs = []
nodeidx += 1
for child in f.children
A, nodeidx, bufferidx, varbufferidx = build(child, batch, rb, de, nodeidx, bufferidx, varbufferidx, convert)
A, nodeidx, bufferidx, varbufferidx =
build(child, batch, rb, de, nodeidx, bufferidx, varbufferidx, convert)
push!(vecs, A)
end
data = Tuple(vecs)
Expand All @@ -958,18 +1037,38 @@ function build(f::Meta.Field, L::Meta.Union, batch, rb, de, nodeidx, bufferidx,
return B, nodeidx, bufferidx, varbufferidx
end

function build(f::Meta.Field, L::Meta.Null, batch, rb, de, nodeidx, bufferidx, varbufferidx, convert)
function build(
f::Meta.Field,
L::Meta.Null,
batch,
rb,
de,
nodeidx,
bufferidx,
varbufferidx,
convert,
)
@debug "building array: L = $L"
meta = buildmetadata(f.custom_metadata)
T = juliaeltype(f, meta, convert)
return NullVector{maybemissing(T)}(MissingVector(rb.nodes[nodeidx].length), meta),
nodeidx + 1,
bufferidx,
varbufferidx
nodeidx + 1,
bufferidx,
varbufferidx
end

# primitives
function build(f::Meta.Field, ::L, batch, rb, de, nodeidx, bufferidx, varbufferidx, convert) where {L}
function build(
f::Meta.Field,
::L,
batch,
rb,
de,
nodeidx,
bufferidx,
varbufferidx,
convert,
) where {L}
@debug "building array: L = $L"
validity = buildbitmap(batch, rb, nodeidx, bufferidx)
bufferidx += 1
Expand All @@ -982,10 +1081,23 @@ function build(f::Meta.Field, ::L, batch, rb, de, nodeidx, bufferidx, varbufferi
len = rb.nodes[nodeidx].length
T = juliaeltype(f, meta, convert)
@debug "final julia type for primitive: T = $T"
return Primitive(T, bytes, validity, A, len, meta), nodeidx + 1, bufferidx + 1, varbufferidx
return Primitive(T, bytes, validity, A, len, meta),
nodeidx + 1,
bufferidx + 1,
varbufferidx
end

function build(f::Meta.Field, L::Meta.Bool, batch, rb, de, nodeidx, bufferidx, varbufferidx, convert)
function build(
f::Meta.Field,
L::Meta.Bool,
batch,
rb,
de,
nodeidx,
bufferidx,
varbufferidx,
convert,
)
@debug "building array: L = $L"
validity = buildbitmap(batch, rb, nodeidx, bufferidx)
bufferidx += 1
Expand All @@ -1010,5 +1122,8 @@ function build(f::Meta.Field, L::Meta.Bool, batch, rb, de, nodeidx, bufferidx, v
end
len = rb.nodes[nodeidx].length
T = juliaeltype(f, meta, convert)
return BoolVector{T}(decodedbytes, pos, validity, len, meta), nodeidx + 1, bufferidx + 1, varbufferidx
return BoolVector{T}(decodedbytes, pos, validity, len, meta),
nodeidx + 1,
bufferidx + 1,
varbufferidx
end
5 changes: 3 additions & 2 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1073,7 +1073,7 @@ end

@testset "DataAPI.metadata" begin
df = DataFrame(a=1, b=2, c=3)
for i in 1:2
for i = 1:2
io = IOBuffer()
if i == 1 # skip writing metadata in the first iteration
Arrow.write(io, df)
Expand All @@ -1099,7 +1099,8 @@ end
@test colmetadata(tbl, :b) == colmetadata(df, :b)
@test_throws MethodError colmetadata(tbl, :b, "xyz")
@test colmetadata(tbl, :b, "xyz", "something") == "something"
@test colmetadata(tbl, :b, "xyz", "something"; style=true) == ("something", :default)
@test colmetadata(tbl, :b, "xyz", "something"; style=true) ==
("something", :default)
@test Set(colmetadatakeys(tbl)) == Set(colmetadatakeys(df))

# add metadata for the second iteration
Expand Down

0 comments on commit 0d3c98c

Please sign in to comment.