From 4c0418b571aa3297ed94de19458ba27c120bee91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 20 Apr 2020 15:29:32 +0200 Subject: [PATCH 1/7] mostly fix circular reference bug in show --- src/abstractdataframe/io.jl | 54 +++++++--- src/abstractdataframe/show.jl | 181 +++++++++++++++------------------- 2 files changed, 120 insertions(+), 115 deletions(-) diff --git a/src/abstractdataframe/io.jl b/src/abstractdataframe/io.jl index a4b5e4cea6..ac67bc0bfd 100644 --- a/src/abstractdataframe/io.jl +++ b/src/abstractdataframe/io.jl @@ -6,7 +6,8 @@ Render a data frame to an I/O stream in MIME type `mime`. # Arguments - `io::IO`: The I/O stream to which `df` will be printed. - `mime::MIME`: supported MIME types are: `"text/plain"`, `"text/html"`, `"text/latex"`, - `"text/csv"`, `"text/tab-separated-values"` + `"text/csv"`, `"text/tab-separated-values"` (the last two MIME types do not support + showing `#undef` values) - `df::AbstractDataFrame`: The data frame to print. Additionally selected MIME types support passing the following keyword arguments: @@ -134,11 +135,23 @@ function _show(io::IO, ::MIME"text/html", df::AbstractDataFrame; end for column_name in cnames if isassigned(df[!, column_name], row) - cell = sprint(ourshow, df[row, column_name]) + cell_val = df[row, column_name] + if ismissing(cell_val) + write(io, "missing") + elseif isnothing(cell_val) + write(io, "nothing") + elseif cell_val isa SHOW_TABULAR_TYPES + write(io, "") + cell = sprint(ourshow, cell_val) + write(io, html_escape(cell)) + write(io, "") + else + cell = sprint(ourshow, cell_val) + write(io, "$(html_escape(cell))") + end else - cell = sprint(ourshow, Base.undef_ref_str) + write(io, "#undef") end - write(io, "$(html_escape(cell))") end write(io, "") end @@ -276,12 +289,24 @@ function _show(io::IO, ::MIME"text/latex", df::AbstractDataFrame; write(io, @sprintf("%d", rowid === nothing ? row : rowid)) for col in 1:mxcol write(io, " & ") - cell = isassigned(df[!, col], row) ? df[row,col] : Base.undef_ref_str - if !ismissing(cell) - if showable(MIME("text/latex"), cell) - show(io, MIME("text/latex"), cell) - else + if !isassigned(df[!, col], row) + print(io, "\\emph{\\#undef}") + else + cell = df[row,col] + if ismissing(cell) + print(io, "\\emph{missing}") + elseif isnothing(cell) + print(io, "\\emph{nothing}") + elseif cell isa SHOW_TABULAR_TYPES + print(io, "\\emph{") print(io, latex_escape(sprint(ourshow, cell, context=io))) + print(io, "}") + else + if showable(MIME("text/latex"), cell) + show(io, MIME("text/latex"), cell) + else + print(io, latex_escape(sprint(ourshow, cell, context=io))) + end end end end @@ -360,7 +385,8 @@ function printtable(io::IO, header::Bool = true, separator::Char = ',', quotemark::Char = '"', - missingstring::AbstractString = "missing") + missingstring::AbstractString = "missing", + nothingstring::AbstractString = "nothing") _check_consistency(df) n, p = size(df) etypes = eltype.(eachcol(df)) @@ -380,7 +406,11 @@ function printtable(io::IO, quotestr = string(quotemark) for i in 1:n for j in 1:p - if !ismissing(df[i, j]) + if ismissing(df[i, j]) + print(io, missingstring) + elseif isnothing(df[i, j]) + print(io, nothingstring) + else if ! (etypes[j] <: Real) print(io, quotemark) escapedprint(io, df[i, j], quotestr) @@ -388,8 +418,6 @@ function printtable(io::IO, else print(io, df[i, j]) end - else - print(io, missingstring) end if j < p print(io, separator) diff --git a/src/abstractdataframe/show.jl b/src/abstractdataframe/show.jl index 78c78d08f1..b53673d57c 100644 --- a/src/abstractdataframe/show.jl +++ b/src/abstractdataframe/show.jl @@ -2,20 +2,15 @@ Base.summary(df::AbstractDataFrame) = @sprintf("%d×%d %s", size(df)..., typeof(df).name) Base.summary(io::IO, df::AbstractDataFrame) = print(io, summary(df)) -let - local buffer = IOBuffer(Vector{UInt8}(undef, 80), read=true, write=true) - global ourstrwidth - - """ - DataFrames.ourstrwidth(io::IO, x::Any) - - Determine the number of characters that would be used to print a value. - """ - function ourstrwidth(io::IO, x::Any) - truncate(buffer, 0) - ourshow(IOContext(buffer, :compact=>get(io, :compact, true)), x) - textwidth(String(take!(buffer))) - end +""" + DataFrames.ourstrwidth(io::IO, x::Any, buffer) + +Determine the number of characters that would be used to print a value. +""" +function ourstrwidth(io::IO, x::Any, buffer::Base.GenericIOBuffer{Array{UInt8,1}}) + truncate(buffer, 0) + ourshow(IOContext(buffer, :compact=>get(io, :compact, true)), x) + textwidth(String(take!(buffer))) end """ @@ -25,25 +20,40 @@ Render a value to an `IO` object compactly and omitting type information, by calling 3-argument `show`, or 2-argument `show` if the former contains line breaks. Unlike `show`, render strings without surrounding quote marks. """ -function ourshow(io::IO, x::Any) - io = IOContext(io, :compact=>get(io, :compact, true), :typeinfo=>typeof(x)) +function ourshow(io::IO, x::Any; styled::Bool=false) + io_ctx = IOContext(io, :compact=>get(io, :compact, true), :typeinfo=>typeof(x)) # This mirrors the behavior of Base.print_matrix_row # First try 3-arg show - sx = sprint(show, "text/plain", x, context=io) + sx = sprint(show, "text/plain", x, context=io_ctx) # If the output contains line breaks, try 2-arg show instead. if occursin('\n', sx) - sx = sprint(show, x, context=io) + sx = sprint(show, x, context=io_ctx) end - print(io, sx) + # + if x isa AbstractString + @assert sx[1] == sx[end] == '"' + sx = escape_string(chop(sx, head=1, tail=1), "") + end + + if styled + printstyled(io_ctx, sx, color=:light_black) + else + print(io_ctx, sx) + end end +const SHOW_TABULAR_TYPES = Union{AbstractDataFrame, DataFrameRow, DataFrameRows, + DataFrameColumns, GroupedDataFrame} + ourshow(io::IO, x::AbstractString) = escape_string(io, x, "") ourshow(io::IO, x::CategoricalValue{<:AbstractString}) = escape_string(io, get(x), "") ourshow(io::IO, x::Symbol) = ourshow(io, string(x)) -ourshow(io::IO, x::Nothing) = nothing +ourshow(io::IO, x::Nothing; styled::Bool=false) = ourshow(io, "nothing", styled=styled) +ourshow(io::IO, x::SHOW_TABULAR_TYPES; styled::Bool=false) = + ourshow(io, summary(x), styled=styled) # AbstractChar: https://github.com/JuliaLang/julia/pull/34730 (1.5.0-DEV.261) # Irrational: https://github.com/JuliaLang/julia/pull/34741 (1.5.0-DEV.266) @@ -112,8 +122,9 @@ end rowindices1::AbstractVector{Int}, rowindices2::AbstractVector{Int}, rowlabel::Symbol, - rowid=nothing, - show_eltype::Bool=true) + rowid, + show_eltype::Bool, + buffer) Calculate, for each column of an AbstractDataFrame, the maximum string width used to render the name of that column, its type, and the @@ -140,19 +151,8 @@ implicit row ID column contained in every `AbstractDataFrame`. numeric ID's of each row. Typically, this will be set to "Row". - `rowid`: Used to handle showing `DataFrameRow`. - `show_eltype`: Whether to print the column type - under the column name in the heading. Defaults to `true`. - -# Examples -```jldoctest -julia> using DataFrames - -julia> df = DataFrame(A = 1:3, B = ["x", "yy", "z"]); - -julia> DataFrames.getmaxwidths(df, stdout, 1:1, 3:3, :Row) -3-element Array{Int64,1}: - 1 - 1 - 3 + under the column name in the heading. +- `buffer`: buffer passed around to avoid reallocations in `ourstrwidth` ``` """ function getmaxwidths(df::AbstractDataFrame, @@ -160,27 +160,28 @@ function getmaxwidths(df::AbstractDataFrame, rowindices1::AbstractVector{Int}, rowindices2::AbstractVector{Int}, rowlabel::Symbol, - rowid=nothing, - show_eltype::Bool=true) + rowid, + show_eltype::Bool, + buffer) maxwidths = Vector{Int}(undef, size(df, 2) + 1) - undefstrwidth = ourstrwidth(io, Base.undef_ref_str) + undefstrwidth = ourstrwidth(io, Base.undef_ref_str, buffer) j = 1 for (name, col) in pairs(eachcol(df)) # (1) Consider length of column name - maxwidth = ourstrwidth(io, name) + maxwidth = ourstrwidth(io, name, buffer) # (2) Consider length of longest entry in that column for indices in (rowindices1, rowindices2), i in indices if isassigned(col, i) - maxwidth = max(maxwidth, ourstrwidth(io, col[i])) + maxwidth = max(maxwidth, ourstrwidth(io, col[i], buffer)) else maxwidth = max(maxwidth, undefstrwidth) end end if show_eltype - maxwidths[j] = max(maxwidth, ourstrwidth(io, compacttype(eltype(col)))) + maxwidths[j] = max(maxwidth, ourstrwidth(io, compacttype(eltype(col)), buffer)) else maxwidths[j] = maxwidth end @@ -190,9 +191,9 @@ function getmaxwidths(df::AbstractDataFrame, if rowid isa Nothing rowmaxwidth1 = isempty(rowindices1) ? 0 : ndigits(maximum(rowindices1)) rowmaxwidth2 = isempty(rowindices2) ? 0 : ndigits(maximum(rowindices2)) - maxwidths[j] = max(max(rowmaxwidth1, rowmaxwidth2), ourstrwidth(io, rowlabel)) + maxwidths[j] = max(max(rowmaxwidth1, rowmaxwidth2), ourstrwidth(io, rowlabel, buffer)) else - maxwidths[j] = max(ndigits(rowid), ourstrwidth(io, rowlabel)) + maxwidths[j] = max(ndigits(rowid), ourstrwidth(io, rowlabel, buffer)) end return maxwidths @@ -211,21 +212,6 @@ pretty print the `AbstractDataFrame`. # Arguments - `maxwidths::Vector{Int}`: The maximum width needed to render each column of an `AbstractDataFrame`. - -# Examples -```jldoctest -julia> using DataFrames - -julia> df = DataFrame(A = 1:3, B = ["x", "yy", "z"]); - -julia> maxwidths = DataFrames.getmaxwidths(df, stdout, 1:1, 3:3, :Row) -3-element Array{Int64,1}: - 1 - 1 - 3 - -julia> DataFrames.getprintedwidth(maxwidths) -15 ``` """ function getprintedwidth(maxwidths::Vector{Int}) @@ -260,23 +246,6 @@ contains columns 4-5. - `splitcols::Bool`: Whether to split printing in chunks of columns fitting the screen width rather than printing all columns in the same block. - `availablewidth::Int`: The available width in the REPL. - -# Examples -```jldoctest -julia> using DataFrames - -julia> df = DataFrame(A = 1:3, B = ["x", "yy", "z"]); - -julia> maxwidths = DataFrames.getmaxwidths(df, stdout, 1:1, 3:3, :Row) -3-element Array{Int64,1}: - 1 - 1 - 3 - -julia> DataFrames.getchunkbounds(maxwidths, true, displaysize()[2]) -2-element Array{Int64,1}: - 0 - 2 ``` """ function getchunkbounds(maxwidths::Vector{Int}, @@ -328,6 +297,7 @@ required for printing have been precomputed. - `leftcol::Int`: The index of the first column in a chunk to be rendered. - `rightcol::Int`: The index of the last column in a chunk to be rendered. - `rowid`: Used to handle showing `DataFrameRow`. +- `buffer`: buffer passed around to avoid reallocations in `ourstrwidth` # Examples ```jldoctest @@ -346,7 +316,8 @@ function showrowindices(io::IO, maxwidths::Vector{Int}, leftcol::Int, rightcol::Int, - rowid) + rowid, + buffer) rowmaxwidth = maxwidths[end] for i in rowindices @@ -366,17 +337,15 @@ function showrowindices(io::IO, strlen = 0 if isassigned(df[!, j], i) s = df[i, j] - strlen = ourstrwidth(io, s) - if ismissing(s) - printstyled(io, s, color=:light_black) - elseif s === nothing - strlen = 0 + strlen = ourstrwidth(io, s, buffer) + if ismissing(s) || isnothing(s) || s isa SHOW_TABULAR_TYPES + ourshow(io, s, styled=true) else ourshow(io, s) end else - strlen = ourstrwidth(io, Base.undef_ref_str) - ourshow(io, Base.undef_ref_str) + strlen = ourstrwidth(io, "#undef", buffer) + ourshow(io, "#undef", styled=true) end padding = maxwidths[j] - strlen for _ in 1:padding @@ -402,12 +371,13 @@ end rowindices1::AbstractVector{Int}, rowindices2::AbstractVector{Int}, maxwidths::Vector{Int}, - splitcols::Bool = false, - allcols::Bool = false, - rowlabel::Symbol = :Row, - displaysummary::Bool = true, - eltypes::Bool = true, - rowid=nothing) + splitcols::Bool, + allcols::Bool, + rowlabel::Symbol, + displaysummary::Bool, + eltypes::Bool, + rowid, + buffer) Render a subset of rows (possibly in chunks) of an `AbstractDataFrame` to an I/O stream. @@ -437,6 +407,7 @@ NOTE: The value of `maxwidths[end]` must be the string width of - `eltypes::Bool = true`: Whether to print the column type under the column name in the heading. Defaults to `true`. - `rowid = nothing`: Used to handle showing `DataFrameRow` +- `buffer`: buffer passed around to avoid reallocations in `ourstrwidth` # Examples julia> using DataFrames @@ -459,12 +430,14 @@ function showrows(io::IO, rowindices1::AbstractVector{Int}, rowindices2::AbstractVector{Int}, maxwidths::Vector{Int}, - splitcols::Bool = false, - allcols::Bool = false, - rowlabel::Symbol = :Row, - displaysummary::Bool = true, - eltypes::Bool = true, - rowid=nothing) + splitcols::Bool, + allcols::Bool, + rowlabel::Symbol, + displaysummary::Bool, + eltypes::Bool, + rowid, + buffer) + ncols = size(df, 2) if isempty(rowindices1) @@ -490,7 +463,7 @@ function showrows(io::IO, # Print column names @printf io "│ %s" rowlabel - padding = rowmaxwidth - ourstrwidth(io, rowlabel) + padding = rowmaxwidth - ourstrwidth(io, rowlabel, buffer) for itr in 1:padding write(io, ' ') end @@ -498,7 +471,7 @@ function showrows(io::IO, for j in leftcol:rightcol s = _names(df)[j] ourshow(io, s) - padding = maxwidths[j] - ourstrwidth(io, s) + padding = maxwidths[j] - ourstrwidth(io, s, buffer) for itr in 1:padding write(io, ' ') end @@ -520,7 +493,7 @@ function showrows(io::IO, for j in leftcol:rightcol s = compacttype(eltype(df[!, j]), maxwidths[j], false) printstyled(io, s, color=:light_black) - padding = maxwidths[j] - ourstrwidth(io, s) + padding = maxwidths[j] - ourstrwidth(io, s, buffer) for itr in 1:padding write(io, ' ') end @@ -557,7 +530,7 @@ function showrows(io::IO, maxwidths, leftcol, rightcol, - rowid) + rowid, buffer) if !isempty(rowindices2) print(io, "\n⋮\n") @@ -567,7 +540,7 @@ function showrows(io::IO, maxwidths, leftcol, rightcol, - rowid) + rowid, buffer) end # Print newlines to separate chunks @@ -589,6 +562,10 @@ function _show(io::IO, eltypes::Bool = true, rowid=nothing) _check_consistency(df) + + # we will pass around this buffer to avoid its reallocation in ourstrwidth + buffer = IOBuffer(Vector{UInt8}(undef, 80), read=true, write=true) + nrows = size(df, 1) if rowid !== nothing if size(df, 2) == 0 @@ -608,7 +585,7 @@ function _show(io::IO, rowindices1 = 1:bound rowindices2 = max(bound + 1, nrows - nrowssubset + 1):nrows end - maxwidths = getmaxwidths(df, io, rowindices1, rowindices2, rowlabel, rowid, eltypes) + maxwidths = getmaxwidths(df, io, rowindices1, rowindices2, rowlabel, rowid, eltypes, buffer) width = getprintedwidth(maxwidths) showrows(io, df, @@ -620,7 +597,7 @@ function _show(io::IO, rowlabel, summary, eltypes, - rowid) + rowid, buffer) return end From 46af00a45cc74fa0a4c6779b966fffd301869f2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 20 Apr 2020 16:46:21 +0200 Subject: [PATCH 2/7] fix rough edge cases --- src/abstractdataframe/io.jl | 12 ++- src/abstractdataframe/show.jl | 2 +- test/io.jl | 157 ++++++++++++++++++++++++++++++---- test/show.jl | 12 +-- 4 files changed, 155 insertions(+), 28 deletions(-) diff --git a/src/abstractdataframe/io.jl b/src/abstractdataframe/io.jl index ac67bc0bfd..361c4d56b6 100644 --- a/src/abstractdataframe/io.jl +++ b/src/abstractdataframe/io.jl @@ -82,6 +82,10 @@ end function _show(io::IO, ::MIME"text/html", df::AbstractDataFrame; summary::Bool=true, eltypes::Bool=true, rowid::Union{Int,Nothing}=nothing) _check_consistency(df) + + # we will pass around this buffer to avoid its reallocation in ourstrwidth + buffer = IOBuffer(Vector{UInt8}(undef, 80), read=true, write=true) + if rowid !== nothing if size(df, 2) == 0 rowid = nothing @@ -94,7 +98,7 @@ function _show(io::IO, ::MIME"text/html", df::AbstractDataFrame; if get(io, :limit, false) tty_rows, tty_cols = displaysize(io) mxrow = min(mxrow, tty_rows) - maxwidths = getmaxwidths(df, io, 1:mxrow, 0:-1, :X) .+ 2 + maxwidths = getmaxwidths(df, io, 1:mxrow, 0:-1, :X, nothing, true, buffer) .+ 2 mxcol = min(mxcol, searchsortedfirst(cumsum(maxwidths), tty_cols)) end @@ -248,6 +252,10 @@ end function _show(io::IO, ::MIME"text/latex", df::AbstractDataFrame; eltypes::Bool=true, rowid=nothing) _check_consistency(df) + + # we will pass around this buffer to avoid its reallocation in ourstrwidth + buffer = IOBuffer(Vector{UInt8}(undef, 80), read=true, write=true) + if rowid !== nothing if size(df, 2) == 0 rowid = nothing @@ -260,7 +268,7 @@ function _show(io::IO, ::MIME"text/latex", df::AbstractDataFrame; if get(io, :limit, false) tty_rows, tty_cols = get(io, :displaysize, displaysize(io)) mxrow = min(mxrow, tty_rows) - maxwidths = getmaxwidths(df, io, 1:mxrow, 0:-1, :X) .+ 2 + maxwidths = getmaxwidths(df, io, 1:mxrow, 0:-1, :X, nothing, true, buffer) .+ 2 mxcol = min(mxcol, searchsortedfirst(cumsum(maxwidths), tty_cols)) end diff --git a/src/abstractdataframe/show.jl b/src/abstractdataframe/show.jl index b53673d57c..1f4213e90d 100644 --- a/src/abstractdataframe/show.jl +++ b/src/abstractdataframe/show.jl @@ -165,7 +165,7 @@ function getmaxwidths(df::AbstractDataFrame, buffer) maxwidths = Vector{Int}(undef, size(df, 2) + 1) - undefstrwidth = ourstrwidth(io, Base.undef_ref_str, buffer) + undefstrwidth = ourstrwidth(io, "#undef", buffer) j = 1 for (name, col) in pairs(eachcol(df)) diff --git a/test/io.jl b/test/io.jl index cd0c67994d..210353fa4b 100644 --- a/test/io.jl +++ b/test/io.jl @@ -17,10 +17,10 @@ using Test, DataFrames, CategoricalArrays, Dates \t\\hline \t& $(Int) & String & String & Float64? & Cat…? & String\\\\ \t\\hline - \t1 & 1 & \\\$10.0 & A & 1.0 & a & \\#undef \\\\ - \t2 & 2 & M\\&F & B & 2.0 & & \\#undef \\\\ - \t3 & 3 & A\\textasciitilde{}B & C & & c & \\#undef \\\\ - \t4 & 4 & \\textbackslash{}\\textbackslash{}alpha & S & 3.0 & d & \\#undef \\\\ + \t1 & 1 & \\\$10.0 & A & 1.0 & a & \\emph{\\#undef} \\\\ + \t2 & 2 & M\\&F & B & 2.0 & \\emph{missing} & \\emph{\\#undef} \\\\ + \t3 & 3 & A\\textasciitilde{}B & C & \\emph{missing} & c & \\emph{\\#undef} \\\\ + \t4 & 4 & \\textbackslash{}\\textbackslash{}alpha & S & 3.0 & d & \\emph{\\#undef} \\\\ \\end{tabular} """ @test repr(MIME("text/latex"), df) == str @@ -53,7 +53,7 @@ end "StringFloat64?" * "

2 rows × 2 columns

" * "1Suzy1.5" * - "2Amirmissing" + "2Amirmissing" df = DataFrame(Fish = Vector{String}(undef, 2), Mass = [1.5, missing]) io = IOBuffer() @@ -63,8 +63,8 @@ end "FishMass" * "StringFloat64?" * "

2 rows × 2 columns

" * - "1#undef1.5" * - "2#undefmissing" + "1#undef1.5" * + "2#undefmissing" io = IOBuffer() show(io, "text/html", eachrow(df)) @@ -73,8 +73,8 @@ end "" * "" * - "" * - "
" * "FishMass
StringFloat64?
1#undef1.5
2#undefmissing
" + "1#undef1.5" * + "2#undefmissing" io = IOBuffer() show(io, "text/html", eachcol(df)) @@ -83,8 +83,8 @@ end "" * "" * - "" * - "
" * "FishMass
StringFloat64?
1#undef1.5
2#undefmissing
" + "1#undef1.5" * + "2#undefmissing" io = IOBuffer() show(io, "text/html", df[1, :]) @@ -92,7 +92,7 @@ end @test str == "

DataFrameRow (2 columns)

" * "" * "" * - "
FishMass
StringFloat64?
1#undef1.5
" + "#undef1.5" io = IOBuffer() show(io, MIME"text/html"(), df, summary=false) @@ -100,8 +100,8 @@ end @test str == "" * "" * - "" * - "
" * "FishMass
StringFloat64?
1#undef1.5
2#undefmissing
" + "1#undef1.5" * + "2#undefmissing" io = IOBuffer() show(io, MIME"text/html"(), eachrow(df), summary=false) @@ -109,8 +109,8 @@ end @test str == "" * "" * - "" * - "
" * "FishMass
StringFloat64?
1#undef1.5
2#undefmissing
" + "1#undef1.5" * + "2#undefmissing" io = IOBuffer() show(io, MIME"text/html"(), eachcol(df), summary=false) @@ -118,15 +118,15 @@ end @test str == "" * "" * - "" * - "
" * "FishMass
StringFloat64?
1#undef1.5
2#undefmissing
" + "1#undef1.5" * + "2#undefmissing" io = IOBuffer() show(io, MIME"text/html"(), df[1, :], summary=false) str = String(take!(io)) @test str == "" * "" * - "
FishMass
StringFloat64?
1#undef1.5
" + "1#undef1.5" @test_throws ArgumentError DataFrames._show(stdout, MIME("text/html"), DataFrame(ones(2,2)), rowid=10) @@ -398,4 +398,123 @@ end end end +@testset "improved printing of special types" begin + df = DataFrame(A=1:9, B = Vector{Any}(undef, 9)) + df.B[1:8] = [df, # DataFrame + df[1,:], # DataFrameRow + view(df,1:1, :), # SubDataFrame + eachrow(df), # DataFrameColumns + eachcol(df), # DataFrameRows + groupby(df, :A),missing,nothing] # GroupedDataFrame + + io = IOBuffer() + show(io, df) + str = String(take!(io)) + + @test str == """ + 9×2 DataFrame + │ Row │ A │ B │ + │ │ Int64 │ Any │ + ├─────┼───────┼────────────────────────────────────────────────┤ + │ 1 │ 1 │ 9×2 DataFrame │ + │ 2 │ 2 │ 2-element DataFrameRow │ + │ 3 │ 3 │ 1×2 SubDataFrame │ + │ 4 │ 4 │ 9-element DataFrameRows │ + │ 5 │ 5 │ 2-element DataFrameColumns │ + │ 6 │ 6 │ GroupedDataFrame with 9 groups based on key: A │ + │ 7 │ 7 │ missing │ + │ 8 │ 8 │ nothing │ + │ 9 │ 9 │ #undef │""" + + + io = IOBuffer() + show(IOContext(io, :color => true), df) + str = String(take!(io)) + @test str == """ + 9×2 DataFrame + │ Row │ A │ B │ + │ │ \e[90mInt64\e[39m │ \e[90mAny\e[39m │ + ├─────┼───────┼────────────────────────────────────────────────┤ + │ 1 │ 1 │ \e[90m9×2 DataFrame\e[39m │ + │ 2 │ 2 │ \e[90m2-element DataFrameRow\e[39m │ + │ 3 │ 3 │ \e[90m1×2 SubDataFrame\e[39m │ + │ 4 │ 4 │ \e[90m9-element DataFrameRows\e[39m │ + │ 5 │ 5 │ \e[90m2-element DataFrameColumns\e[39m │ + │ 6 │ 6 │ \e[90mGroupedDataFrame with 9 groups based on key: A\e[39m │ + │ 7 │ 7 │ \e[90mmissing\e[39m │ + │ 8 │ 8 │ \e[90mnothing\e[39m │ + │ 9 │ 9 │ \e[90m#undef\e[39m │""" + + + io = IOBuffer() + show(io, MIME("text/html"), df) + str = String(take!(io)) + @test str == "" * + "" * + "

9 rows × 2 columns

" * + "" * + "" * + "" * + "" * + "" * + "" * + "" * + "" * + "
AB
Int64Any
119×2 DataFrame
222-element DataFrameRow
331×2 SubDataFrame
449-element DataFrameRows
552-element DataFrameColumns
66GroupedDataFrame with 9 groups based on key: A
77missing
88nothing
99#undef
" + + io = IOBuffer() + show(io, MIME("text/latex"), df) + str = String(take!(io)) + @test str == """ + \\begin{tabular}{r|cc} + \t& A & B\\\\ + \t\\hline + \t& Int64 & Any\\\\ + \t\\hline + \t1 & 1 & \\emph{9×2 DataFrame} \\\\ + \t2 & 2 & \\emph{2-element DataFrameRow} \\\\ + \t3 & 3 & \\emph{1×2 SubDataFrame} \\\\ + \t4 & 4 & \\emph{9-element DataFrameRows} \\\\ + \t5 & 5 & \\emph{2-element DataFrameColumns} \\\\ + \t6 & 6 & \\emph{GroupedDataFrame with 9 groups based on key: A} \\\\ + \t7 & 7 & \\emph{missing} \\\\ + \t8 & 8 & \\emph{nothing} \\\\ + \t9 & 9 & \\emph{\\#undef} \\\\ + \\end{tabular} + """ + + @test_throws UndefRefError show(io, MIME("text/csv"), df) + @test_throws UndefRefError show(io, MIME("text/tab-separated-values"), df) + + io = IOBuffer() + show(io, MIME("text/csv"), df[1:end-1, :]) + str = String(take!(io)) + @test str == """ + "A","B" + 1,"9×2 DataFrame" + 2,"2-element DataFrameRow" + 3,"1×2 SubDataFrame" + 4,"9-element DataFrameRows" + 5,"2-element DataFrameColumns" + 6,"GroupedDataFrame with 9 groups based on key: A" + 7,missing + 8,nothing + """ + + io = IOBuffer() + show(io, MIME("text/tab-separated-values"), df[1:end-1, :]) + str = String(take!(io)) + @test str == """ + "A"\t"B" + 1\t"9×2 DataFrame" + 2\t"2-element DataFrameRow" + 3\t"1×2 SubDataFrame" + 4\t"9-element DataFrameRows" + 5\t"2-element DataFrameColumns" + 6\t"GroupedDataFrame with 9 groups based on key: A" + 7\tmissing + 8\tnothing + """ +end + end # module diff --git a/test/show.jl b/test/show.jl index 8f68fa47d0..8453dc64db 100644 --- a/test/show.jl +++ b/test/show.jl @@ -269,12 +269,12 @@ end df_nothing = DataFrame(A = [1.0, 2.0, 3.0], B = ["g", "g", nothing]) @test sprint(show, df_nothing) == """ 3×2 DataFrame - │ Row │ A │ B │ - │ │ Float64 │ Union… │ - ├─────┼─────────┼────────┤ - │ 1 │ 1.0 │ g │ - │ 2 │ 2.0 │ g │ - │ 3 │ 3.0 │ │""" + │ Row │ A │ B │ + │ │ Float64 │ Union… │ + ├─────┼─────────┼─────────┤ + │ 1 │ 1.0 │ g │ + │ 2 │ 2.0 │ g │ + │ 3 │ 3.0 │ nothing │""" end @testset "Test correct width computation" begin From e4fbc94b2f74bf5250fa0020b3586f242ca3fc96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 20 Apr 2020 19:59:08 +0200 Subject: [PATCH 3/7] update one more test --- test/dataframerow.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/dataframerow.jl b/test/dataframerow.jl index 6a6ffe6928..4274540015 100644 --- a/test/dataframerow.jl +++ b/test/dataframerow.jl @@ -420,7 +420,7 @@ end │ Row │ a │ b │ │ │ Nothing │ $(Int) │ ├─────┼─────────┼───────┤ - │ 1 │ │ 1 │""" + │ 1 │ nothing │ 1 │""" df = DataFrame(a=1:3, b=["a", "b", "c"], c=Int64[1,0,1]) From cd038ce3d52acd9c4e1623c06a2aa7b110b9e15b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 20 Apr 2020 23:53:00 +0200 Subject: [PATCH 4/7] fix 32-bit machine error --- test/io.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/io.jl b/test/io.jl index 210353fa4b..471c417a1a 100644 --- a/test/io.jl +++ b/test/io.jl @@ -399,7 +399,7 @@ end end @testset "improved printing of special types" begin - df = DataFrame(A=1:9, B = Vector{Any}(undef, 9)) + df = DataFrame(A=Int64.(1:9), B = Vector{Any}(undef, 9)) df.B[1:8] = [df, # DataFrame df[1,:], # DataFrameRow view(df,1:1, :), # SubDataFrame From 802a7490d58e5f615cd36df13e565ff2a2b4adf3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Wed, 22 Apr 2020 10:55:39 +0200 Subject: [PATCH 5/7] Apply suggestions from code review Co-Authored-By: Milan Bouchet-Valat --- src/abstractdataframe/show.jl | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/abstractdataframe/show.jl b/src/abstractdataframe/show.jl index 1f4213e90d..6e75ab84cc 100644 --- a/src/abstractdataframe/show.jl +++ b/src/abstractdataframe/show.jl @@ -7,7 +7,7 @@ Base.summary(io::IO, df::AbstractDataFrame) = print(io, summary(df)) Determine the number of characters that would be used to print a value. """ -function ourstrwidth(io::IO, x::Any, buffer::Base.GenericIOBuffer{Array{UInt8,1}}) +function ourstrwidth(io::IO, x::Any, buffer::IOBuffer) truncate(buffer, 0) ourshow(IOContext(buffer, :compact=>get(io, :compact, true)), x) textwidth(String(take!(buffer))) @@ -122,9 +122,9 @@ end rowindices1::AbstractVector{Int}, rowindices2::AbstractVector{Int}, rowlabel::Symbol, - rowid, + rowid::Union{Integer, Nothing}, show_eltype::Bool, - buffer) + buffer::IOBuffer) Calculate, for each column of an AbstractDataFrame, the maximum string width used to render the name of that column, its type, and the @@ -160,9 +160,9 @@ function getmaxwidths(df::AbstractDataFrame, rowindices1::AbstractVector{Int}, rowindices2::AbstractVector{Int}, rowlabel::Symbol, - rowid, + rowid::Union{Integer, Nothing}, show_eltype::Bool, - buffer) + buffer::IOBuffer) maxwidths = Vector{Int}(undef, size(df, 2) + 1) undefstrwidth = ourstrwidth(io, "#undef", buffer) @@ -316,8 +316,8 @@ function showrowindices(io::IO, maxwidths::Vector{Int}, leftcol::Int, rightcol::Int, - rowid, - buffer) + rowid::Union{Integer, Nothing}, + buffer::IOBuffer) rowmaxwidth = maxwidths[end] for i in rowindices @@ -376,8 +376,8 @@ end rowlabel::Symbol, displaysummary::Bool, eltypes::Bool, - rowid, - buffer) + rowid::Union{Integer, Nothing}, + buffer::IOBuffer) Render a subset of rows (possibly in chunks) of an `AbstractDataFrame` to an I/O stream. @@ -406,8 +406,8 @@ NOTE: The value of `maxwidths[end]` must be the string width of contents of the renderable rows? Defaults to `true`. - `eltypes::Bool = true`: Whether to print the column type under the column name in the heading. Defaults to `true`. -- `rowid = nothing`: Used to handle showing `DataFrameRow` -- `buffer`: buffer passed around to avoid reallocations in `ourstrwidth` +- `rowid::Union{Integer, Nothing} = nothing`: Used to handle showing `DataFrameRow` +- `buffer::IOBuffer`: buffer passed around to avoid reallocations in `ourstrwidth` # Examples julia> using DataFrames @@ -435,8 +435,8 @@ function showrows(io::IO, rowlabel::Symbol, displaysummary::Bool, eltypes::Bool, - rowid, - buffer) + rowid::Union{Integer, Nothing}, + buffer::IOBuffer) ncols = size(df, 2) From 947421110e309664021faf83a28819aba9ee7645 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Wed, 22 Apr 2020 10:56:09 +0200 Subject: [PATCH 6/7] changes after code review --- src/abstractdataframe/show.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/abstractdataframe/show.jl b/src/abstractdataframe/show.jl index 1f4213e90d..24b4aeddd1 100644 --- a/src/abstractdataframe/show.jl +++ b/src/abstractdataframe/show.jl @@ -32,7 +32,7 @@ function ourshow(io::IO, x::Any; styled::Bool=false) sx = sprint(show, x, context=io_ctx) end - # + # strings should have " stripped here if x isa AbstractString @assert sx[1] == sx[end] == '"' sx = escape_string(chop(sx, head=1, tail=1), "") @@ -279,7 +279,8 @@ end maxwidths::Vector{Int}, leftcol::Int, rightcol::Int, - rowid::Union{Int,Nothing}) + rowid::Union{Int,Nothing}, + buffer::IOBuffer) Render a subset of rows and columns of an `AbstractDataFrame` to an I/O stream. For chunked printing, this function is used to print a From 516603078572a2aa18ce5ee38179604f3ef2a310 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Fri, 24 Apr 2020 12:20:42 +0200 Subject: [PATCH 7/7] make nothing print empty string --- src/abstractdataframe/io.jl | 4 ---- src/abstractdataframe/show.jl | 4 ++-- test/dataframerow.jl | 2 +- test/io.jl | 8 ++++---- test/show.jl | 12 ++++++------ 5 files changed, 13 insertions(+), 17 deletions(-) diff --git a/src/abstractdataframe/io.jl b/src/abstractdataframe/io.jl index 361c4d56b6..24564baeb1 100644 --- a/src/abstractdataframe/io.jl +++ b/src/abstractdataframe/io.jl @@ -142,8 +142,6 @@ function _show(io::IO, ::MIME"text/html", df::AbstractDataFrame; cell_val = df[row, column_name] if ismissing(cell_val) write(io, "missing") - elseif isnothing(cell_val) - write(io, "nothing") elseif cell_val isa SHOW_TABULAR_TYPES write(io, "") cell = sprint(ourshow, cell_val) @@ -303,8 +301,6 @@ function _show(io::IO, ::MIME"text/latex", df::AbstractDataFrame; cell = df[row,col] if ismissing(cell) print(io, "\\emph{missing}") - elseif isnothing(cell) - print(io, "\\emph{nothing}") elseif cell isa SHOW_TABULAR_TYPES print(io, "\\emph{") print(io, latex_escape(sprint(ourshow, cell, context=io))) diff --git a/src/abstractdataframe/show.jl b/src/abstractdataframe/show.jl index f2eeb31b2b..931171be21 100644 --- a/src/abstractdataframe/show.jl +++ b/src/abstractdataframe/show.jl @@ -51,7 +51,7 @@ const SHOW_TABULAR_TYPES = Union{AbstractDataFrame, DataFrameRow, DataFrameRows, ourshow(io::IO, x::AbstractString) = escape_string(io, x, "") ourshow(io::IO, x::CategoricalValue{<:AbstractString}) = escape_string(io, get(x), "") ourshow(io::IO, x::Symbol) = ourshow(io, string(x)) -ourshow(io::IO, x::Nothing; styled::Bool=false) = ourshow(io, "nothing", styled=styled) +ourshow(io::IO, x::Nothing; styled::Bool=false) = ourshow(io, "", styled=styled) ourshow(io::IO, x::SHOW_TABULAR_TYPES; styled::Bool=false) = ourshow(io, summary(x), styled=styled) @@ -339,7 +339,7 @@ function showrowindices(io::IO, if isassigned(df[!, j], i) s = df[i, j] strlen = ourstrwidth(io, s, buffer) - if ismissing(s) || isnothing(s) || s isa SHOW_TABULAR_TYPES + if ismissing(s) || s isa SHOW_TABULAR_TYPES ourshow(io, s, styled=true) else ourshow(io, s) diff --git a/test/dataframerow.jl b/test/dataframerow.jl index 4274540015..6a6ffe6928 100644 --- a/test/dataframerow.jl +++ b/test/dataframerow.jl @@ -420,7 +420,7 @@ end │ Row │ a │ b │ │ │ Nothing │ $(Int) │ ├─────┼─────────┼───────┤ - │ 1 │ nothing │ 1 │""" + │ 1 │ │ 1 │""" df = DataFrame(a=1:3, b=["a", "b", "c"], c=Int64[1,0,1]) diff --git a/test/io.jl b/test/io.jl index 471c417a1a..0e9ea5bb64 100644 --- a/test/io.jl +++ b/test/io.jl @@ -423,7 +423,7 @@ end │ 5 │ 5 │ 2-element DataFrameColumns │ │ 6 │ 6 │ GroupedDataFrame with 9 groups based on key: A │ │ 7 │ 7 │ missing │ - │ 8 │ 8 │ nothing │ + │ 8 │ 8 │ │ │ 9 │ 9 │ #undef │""" @@ -442,7 +442,7 @@ end │ 5 │ 5 │ \e[90m2-element DataFrameColumns\e[39m │ │ 6 │ 6 │ \e[90mGroupedDataFrame with 9 groups based on key: A\e[39m │ │ 7 │ 7 │ \e[90mmissing\e[39m │ - │ 8 │ 8 │ \e[90mnothing\e[39m │ + │ 8 │ 8 │ │ │ 9 │ 9 │ \e[90m#undef\e[39m │""" @@ -459,7 +459,7 @@ end "552-element DataFrameColumns" * "66GroupedDataFrame with 9 groups based on key: A" * "77missing" * - "88nothing" * + "88" * "99#undef" io = IOBuffer() @@ -478,7 +478,7 @@ end \t5 & 5 & \\emph{2-element DataFrameColumns} \\\\ \t6 & 6 & \\emph{GroupedDataFrame with 9 groups based on key: A} \\\\ \t7 & 7 & \\emph{missing} \\\\ - \t8 & 8 & \\emph{nothing} \\\\ + \t8 & 8 & \\\\ \t9 & 9 & \\emph{\\#undef} \\\\ \\end{tabular} """ diff --git a/test/show.jl b/test/show.jl index 8453dc64db..8f68fa47d0 100644 --- a/test/show.jl +++ b/test/show.jl @@ -269,12 +269,12 @@ end df_nothing = DataFrame(A = [1.0, 2.0, 3.0], B = ["g", "g", nothing]) @test sprint(show, df_nothing) == """ 3×2 DataFrame - │ Row │ A │ B │ - │ │ Float64 │ Union… │ - ├─────┼─────────┼─────────┤ - │ 1 │ 1.0 │ g │ - │ 2 │ 2.0 │ g │ - │ 3 │ 3.0 │ nothing │""" + │ Row │ A │ B │ + │ │ Float64 │ Union… │ + ├─────┼─────────┼────────┤ + │ 1 │ 1.0 │ g │ + │ 2 │ 2.0 │ g │ + │ 3 │ 3.0 │ │""" end @testset "Test correct width computation" begin