From 0d1fe72d538d535c9b3b793fa868b93119ed0688 Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Sat, 2 Mar 2019 11:45:47 +0100 Subject: [PATCH] Skip missing values in describe (#470) --- src/scalarstats.jl | 30 ++++++++++++++---------------- test/misc.jl | 7 ++++++- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/src/scalarstats.jl b/src/scalarstats.jl index a885e6493115e..5a53b34f225a2 100644 --- a/src/scalarstats.jl +++ b/src/scalarstats.jl @@ -562,17 +562,19 @@ median, 75th percentile, and maxmimum. function summarystats(a::AbstractArray{T}) where T<:Union{Real,Missing} # `mean` doesn't fail on empty input but rather returns `NaN`, so we can use the # return type to populate the `SummaryStats` structure. - m = mean(a) + s = T >: Missing ? collect(skipmissing(a)) : a + m = mean(s) R = typeof(m) n = length(a) - qs = if n == 0 + ns = length(s) + qs = if m == 0 || n == 0 R[NaN, NaN, NaN, NaN, NaN] - elseif ismissing(m) - [missing, missing, missing, missing, missing] + elseif T >: Missing + quantile!(s, [0.00, 0.25, 0.50, 0.75, 1.00]) else - quantile(a, [0.00, 0.25, 0.50, 0.75, 1.00]) + quantile(s, [0.00, 0.25, 0.50, 0.75, 1.00]) end - SummaryStats{R}(m, qs..., n, count(ismissing, a)) + SummaryStats{R}(m, qs..., n, n - ns) end function Base.show(io::IO, ss::SummaryStats) @@ -580,16 +582,12 @@ function Base.show(io::IO, ss::SummaryStats) @printf(io, "Length: %i\n", ss.nobs) ss.nobs > 0 || return @printf(io, "Missing Count: %i\n", ss.nmiss) - if ss.nmiss > 0 - println(io, "(All summary stats are missing)") - else - @printf(io, "Mean: %.6f\n", ss.mean) - @printf(io, "Minimum: %.6f\n", ss.min) - @printf(io, "1st Quartile: %.6f\n", ss.q25) - @printf(io, "Median: %.6f\n", ss.median) - @printf(io, "3rd Quartile: %.6f\n", ss.q75) - @printf(io, "Maximum: %.6f\n", ss.max) - end + @printf(io, "Mean: %.6f\n", ss.mean) + @printf(io, "Minimum: %.6f\n", ss.min) + @printf(io, "1st Quartile: %.6f\n", ss.q25) + @printf(io, "Median: %.6f\n", ss.median) + @printf(io, "3rd Quartile: %.6f\n", ss.q75) + @printf(io, "Maximum: %.6f\n", ss.max) end diff --git a/test/misc.jl b/test/misc.jl index 7c09b32b7f542..6773d2c2a58d0 100644 --- a/test/misc.jl +++ b/test/misc.jl @@ -56,7 +56,12 @@ describe(io, Union{Float32,Missing}[1.0, 4.5, missing, missing, 33.1]) Summary Stats: Length: 5 Missing Count: 2 - (All summary stats are missing) + Mean: 12.866666 + Minimum: 1.000000 + 1st Quartile: 2.750000 + Median: 4.500000 + 3rd Quartile: 18.799999 + Maximum: 33.099998 Type: $(Union{Float32,Missing}) """