Skip to content

Commit

Permalink
Improve printing CoefTable (JuliaLang#481)
Browse files Browse the repository at this point in the history
Rely on printing code from Base to align numbers on the decimal separator.
Unfortunately, we cannot treat column names as a normal row since names would
be left-aligned with the decimal separator, which wastes space and is less clean.
Instead, adjust the alignment manually so that columns are wide enough to contain
names.

Only convert values to PValue before printing, so that users can still access the contents
of columns as normal numbers. This is convenient in particular for testing in packages.

Print test statistics with 2 decimals.

Add horizontal lines.
  • Loading branch information
nalimilan authored Apr 10, 2019
1 parent b7fe44c commit 81a07a1
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 57 deletions.
95 changes: 58 additions & 37 deletions src/statmodels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -358,36 +358,41 @@ function params! end

## coefficient tables with specialized show method

## Nms are the coefficient names, corresponding to rows in the table
mutable struct CoefTable
cols::Vector
colnms::Vector
rownms::Vector
function CoefTable(cols::Vector,colnms::Vector,rownms::Vector)
pvalcol::Int
teststatcol::Int
function CoefTable(cols::Vector,colnms::Vector,rownms::Vector,
pvalcol::Int=0,teststatcol::Int=0)
nc = length(cols)
nrs = map(length,cols)
nr = nrs[1]
length(colnms) in [0,nc] || error("colnms should have length 0 or $nc")
length(rownms) in [0,nr] || error("rownms should have length 0 or $nr")
all(nrs .== nr) || error("Elements of cols should have equal lengths, but got $nrs")
new(cols,colnms,rownms)
length(colnms) in [0,nc] || throw(ArgumentError("colnms should have length 0 or $nc"))
length(rownms) in [0,nr] || throw(ArgumentError("rownms should have length 0 or $nr"))
all(nrs .== nr) || throw(ArgumentError("Elements of cols should have equal lengths, but got $nrs"))
pvalcol in 0:nc || throw(ArgumentError("pvalcol should be between 0 and $nc"))
teststatcol in 0:nc || throw(ArgumentError("teststatcol should be between 0 and $nc"))
new(cols,colnms,rownms,pvalcol,teststatcol)
end

function CoefTable(mat::Matrix,colnms::Vector,rownms::Vector,pvalcol::Int=0)
function CoefTable(mat::Matrix,colnms::Vector,rownms::Vector,
pvalcol::Int=0,teststatcol::Int=0)
nc = size(mat,2)
cols = Any[mat[:, i] for i in 1:nc]
if pvalcol != 0 # format the p-values column
cols[pvalcol] = [PValue(cols[pvalcol][j])
for j in eachindex(cols[pvalcol])]
end
CoefTable(cols,colnms,rownms)
CoefTable(cols,colnms,rownms,pvalcol,teststatcol)
end
end

mutable struct PValue
v::Number
function PValue(v::Number)
0. <= v <= 1. || isnan(v) || error("p-values must be in [0.,1.]")
"""
Show a p-value using 6 characters, either using the standard 0.XXXX
representation or as <Xe-YY.
"""
struct PValue
v::Real
function PValue(v::Real)
0 <= v <= 1 || isnan(v) || error("p-values must be in [0; 1]")
new(v)
end
end
Expand All @@ -403,36 +408,52 @@ function show(io::IO, pv::PValue)
end
end

"""Show a test statistic using 2 decimal digits"""
struct TestStat <: Real
v::Real
end

show(io::IO, x::TestStat) = @printf(io, "%.2f", x.v)

"""Wrap a string so that show omits quotes"""
struct NoQuote
s::String
end

show(io::IO, n::NoQuote) = print(io, n.s)

function show(io::IO, ct::CoefTable)
cols = ct.cols; rownms = ct.rownms; colnms = ct.colnms;
nc = length(cols)
nr = length(cols[1])
if length(rownms) == 0
rownms = [lpad("[$i]",floor(Integer, log10(nr))+3) for i in 1:nr]
end
rnwidth = max(4,maximum([length(nm) for nm in rownms]) + 1)
rownms = [rpad(nm,rnwidth) for nm in rownms]
widths = [length(cn)::Int for cn in colnms]
str = String[isa(cols[j][i], AbstractString) ? cols[j][i] :
sprint(show, cols[j][i], context=:compact=>true) for i in 1:nr, j in 1:nc]
for j in 1:nc
for i in 1:nr
lij = length(str[i,j])
if lij > widths[j]
widths[j] = lij
end
end
mat = [j == 1 ? NoQuote(rownms[i]) :
j-1 == ct.pvalcol ? PValue(cols[j-1][i]) :
j-1 in ct.teststatcol ? TestStat(cols[j-1][i]) :
cols[j-1][i] isa AbstractString ? NoQuote(cols[j-1][i]) : cols[j-1][i]
for i in 1:nr, j in 1:nc+1]
# Code inspired by print_matrix in Base
io = IOContext(io, :compact=>true, :limit=>false)
A = Base.alignment(io, mat, 1:size(mat, 1), 1:size(mat, 2),
typemax(Int), typemax(Int), 3)
nmswidths = pushfirst!(length.(colnms), 0)
A = [nmswidths[i] > sum(A[i]) ? (A[i][1]+nmswidths[i]-sum(A[i]), A[i][2]) : A[i]
for i in 1:length(A)]
totwidth = sum(sum.(A)) + 2 * (length(A) - 1)
println(io, repeat('', totwidth))
print(io, repeat(' ', sum(A[1])))
for j in 1:length(colnms)
print(io, " ", lpad(colnms[j], sum(A[j+1])))
end
widths .+= 1
println(io," " ^ rnwidth *
join([lpad(string(colnms[i]), widths[i]) for i = 1:nc], ""))
for i = 1:nr
print(io, rownms[i])
for j in 1:nc
print(io, lpad(str[i,j],widths[j]))
end
println(io)
println(io, '\n', repeat('', totwidth))
for i in 1:size(mat, 1)
Base.print_matrix_row(io, mat, A, i, 1:size(mat, 2), " ")
i != size(mat, 1) && println(io)
end
print(io, '\n', repeat('', totwidth))
nothing
end

"""
Expand Down
44 changes: 24 additions & 20 deletions test/statmodels.jl
Original file line number Diff line number Diff line change
@@ -1,28 +1,32 @@
using StatsBase
using Test, Random

Random.seed!(10)
v1 = rand(3)
v1 = [1.45666, -23.14, 1.56734e-13]
v2 = ["Good", "Great", "Bad"]
v3 = rand(Int8, 3)
v4 = [StatsBase.PValue(rand()./10000) for i in 1:3]
m = rand(3,4)
@test sprint(show, CoefTable(Any[v1, v2, v3, v4],
["Estimate", "Comments", "df", "p"],
["x1", "x2", "x3"])) == """
Estimate Comments df p
x1 0.112582 Good 88 <1e-4
x2 0.368314 Great -90 <1e-4
x3 0.344454 Bad -80 <1e-4
"""
v3 = [1, 56, 2]
v4 = [-12.56, 0.1326, 2.68e-16]
v5 = [0.12, 0.3467, 1.345e-16]
@test sprint(show, CoefTable(Any[v1, v2, v3, v4, v5],
["Estimate", "Comments", "df", "t", "p"],
["x1", "x2", "x3"], 5, 4)) == """
───────────────────────────────────────────────
Estimate Comments df t p
───────────────────────────────────────────────
x1 1.45666 Good 1 -12.56 0.1200
x2 -23.14 Great 56 0.13 0.3467
x3 1.56734e-13 Bad 2 0.00 <1e-15
───────────────────────────────────────────────"""

@test sprint(show, CoefTable(m, ["Estimate", "Stderror", "df", "p"],
["x1", "x2", "x3"], 4)) == """
Estimate Stderror df p
x1 0.819778 0.844007 0.923676 0.1717
x2 0.669931 0.67919 0.066098 0.4204
x3 0.453058 0.72525 0.999172 0.5567
"""
Random.seed!(10)
m = rand(3,4)
@test sprint(show, CoefTable(m, ["Estimate", "Stderror", "df", "p"], [], 4)) == """
──────────────────────────────────────────
Estimate Stderror df p
──────────────────────────────────────────
[1] 0.112582 0.0566454 0.381813 0.8198
[2] 0.368314 0.120781 0.815104 0.6699
[3] 0.344454 0.179574 0.242208 0.4531
──────────────────────────────────────────"""

@test sprint(show, StatsBase.PValue(1.0)) == "1.0000"
@test sprint(show, StatsBase.PValue(1e-1)) == "0.1000"
Expand Down

0 comments on commit 81a07a1

Please sign in to comment.