From 0234004977c7f886122ad873cec3b1cefd835844 Mon Sep 17 00:00:00 2001 From: rofinn Date: Tue, 30 Jul 2019 09:54:21 -0500 Subject: [PATCH 1/6] Add rowtable and columntable tests. --- test/runtests.jl | 70 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 58 insertions(+), 12 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index e5cd3ac..581f5d8 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -61,24 +61,58 @@ import Impute: # the original matrix @test_broken isequal(m, expected) end - @testset "DataFrame" begin - df = DataFrame( + + @testset "Tables" begin + orig = DataFrame( :sin => allowmissing(sin.(1.0:1.0:20.0)), :cos => allowmissing(sin.(1.0:1.0:20.0)), ) - df.sin[[2, 3, 7, 12, 19]] .= missing - df.cos[[4, 9]] .= missing - result = impute(df, DropVars(; context=ctx)) - expected = select(df, :cos) + orig.sin[[2, 3, 7, 12, 19]] .= missing + orig.cos[[4, 9]] .= missing - @test isequal(result, expected) - @test isequal(result, Impute.dropvars(df; context=ctx)) + @testset "DataFrame" begin + df = deepcopy(orig) + result = impute(df, DropVars(; context=ctx)) + expected = select(df, :cos) - Impute.dropvars!(df; context=ctx) - # The mutating test is broken because we need to making a copy of - # the original table - @test_broken isequal(df, expected) + @test isequal(result, expected) + @test isequal(result, Impute.dropvars(df; context=ctx)) + + Impute.dropvars!(df; context=ctx) + # The mutating test is broken because we need to making a copy of + # the original table + @test_broken isequal(df, expected) + end + + @testset "Column Table" begin + coltab = Tables.columntable(orig) + + result = impute(coltab, DropVars(; context=ctx)) + expected = Tables.columntable(Tables.select(coltab, :cos)) + + @test isequal(result, expected) + @test isequal(result, Impute.dropvars(coltab; context=ctx)) + + Impute.dropvars!(coltab; context=ctx) + # The mutating test is broken because we need to making a copy of + # the original table + @test_broken isequal(coltab, expected) + end + + @testset "Row Table" begin + rowtab = Tables.rowtable(orig) + result = impute(rowtab, DropVars(; context=ctx)) + expected = Tables.rowtable(Tables.select(rowtab, :cos)) + + @test isequal(result, expected) + @test isequal(result, Impute.dropvars(rowtab; context=ctx)) + + Impute.dropvars!(rowtab; context=ctx) + # The mutating test is broken because we need to making a copy of + # the original table + @test_broken isequal(rowtab, expected) + end end end end @@ -270,6 +304,18 @@ import Impute: @test all(!ismissing, result) end + @testset "Row Table" begin + result = Tables.rowtable(orig) |> + Impute.interp!(; context=ctx) |> + Impute.locf!() |> + Impute.nocb!() |> + Tables.matrix + + @test size(result) == size(orig) + # Confirm that we don't have any more missing values + @test all(!ismissing, result) + end + @testset "Matrix" begin data = Matrix(orig) result = Impute.interp(data; context=ctx) |> Impute.locf!() |> Impute.nocb!() From 8feb68eda311c54c2fa2718c0059eb161906ddbe Mon Sep 17 00:00:00 2001 From: rofinn Date: Tue, 30 Jul 2019 22:42:15 -0500 Subject: [PATCH 2/6] Added more drop tests for table types. --- Project.toml | 2 + src/imputors/drop.jl | 60 +++++------ test/runtests.jl | 237 +++++++++++++++++++++++++++---------------- 3 files changed, 173 insertions(+), 126 deletions(-) diff --git a/Project.toml b/Project.toml index b66b346..f23dcfd 100644 --- a/Project.toml +++ b/Project.toml @@ -4,7 +4,9 @@ authors = ["Invenia Technical Computing"] version = "0.2.0" [deps] +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e" +Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" diff --git a/src/imputors/drop.jl b/src/imputors/drop.jl index ab7a628..bbd5856 100644 --- a/src/imputors/drop.jl +++ b/src/imputors/drop.jl @@ -21,6 +21,9 @@ julia> impute(M, DropObs(; context=Context(; limit=1.0)); dims=2) 1.0 2.0 5.0 1.1 2.2 5.5 ``` + +WARNING: Observations can only be removed in-place for some input data types others +(e.g., matrices, array views, some tables) will require a copy/collect. """ struct DropObs <: Imputor context::AbstractContext @@ -30,12 +33,18 @@ end DropObs(; context=Context()) = DropObs(context) function impute!(data::AbstractVector, imp::DropObs) + parent(data) === data || return impute!(parent(data), imp) + imp.context() do c filter!(x -> !ismissing!(c, x), data) end + + return data end function impute!(data::AbstractMatrix, imp::DropObs; dims=1) + # parent(data) === data || return impute!(parent(data), imp) + imp.context() do c return filterobs(data; dims=dims) do obs !ismissing!(c, obs) @@ -43,10 +52,6 @@ function impute!(data::AbstractMatrix, imp::DropObs; dims=1) end end -# Deleting elements from subarrays doesn't work so we need to collect that data into -# a separate array. -impute!(data::SubArray, imp::DropObs) = impute!(collect(data), imp::DropObs) - function impute!(table, imp::DropObs) imp.context() do c @assert istable(table) @@ -88,6 +93,8 @@ julia> impute(M, DropVars(; context=Context(; limit=0.2)); dims=2) 1×5 Array{Union{Missing, Float64},2}: 1.1 2.2 3.3 missing 5.5 ``` + +WARNING: Variables cannot be removed in-place, so this method will internally perform a copy. """ struct DropVars <: Imputor context::AbstractContext @@ -97,20 +104,11 @@ end DropVars(; context=Context()) = DropVars(context) function impute!(data::AbstractMatrix, imp::DropVars; dims=1) - return filtervars(data; dims=dims) do var - try - imp.context() do c - for x in var - ismissing!(c, x) - end - end - return true - catch e - if isa(e, ImputeError) - return false - else - rethrow(e) - end + # parent(data) === data || return impute!(parent(data), imp) + + imp.context() do c + return filtervars(data; dims=dims) do vars + !ismissing!(c, vars) end end end @@ -119,25 +117,13 @@ function impute!(table, imp::DropVars) istable(table) || throw(MethodError(impute!, (table, imp))) cols = Tables.columns(table) - cnames = Iterators.filter(propertynames(cols)) do cname - try - imp.context() do c - col = getproperty(cols, cname) - for i in eachindex(col) - ismissing!(c, col[i]) - end - end - return true - catch e - if isa(e, ImputeError) - return false - else - rethrow(e) - end + imp.context() do c + cnames = Iterators.filter(propertynames(cols)) do cname + !ismissing!(c, getproperty(cols, cname)) end - end - selected = Tables.select(table, cnames...) - table = materializer(table)(selected) - return table + selected = Tables.select(table, cnames...) + table = materializer(table)(selected) + return table + end end diff --git a/test/runtests.jl b/test/runtests.jl index 581f5d8..f2af10a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -18,64 +18,142 @@ import Impute: WeightedContext, ImputeError + @testset "Impute" begin + # Defining our missing datasets a = allowmissing(1.0:1.0:20.0) a[[2, 3, 7]] .= missing mask = map(!ismissing, a) ctx = Context(; limit=0.2) - @testset "Equality $T" for T in (DropObs, DropVars, Interpolate, Fill, LOCF, NOCB) - @test T() == T() + # We call collect to not have a wrapper type that references the same data. + m = collect(reshape(a, 5, 4)) + + table = DataFrame( + :sin => allowmissing(sin.(1.0:1.0:20.0)), + :cos => allowmissing(sin.(1.0:1.0:20.0)), + ) + + table.sin[[2, 3, 7, 12, 19]] .= missing + + @testset "Equality" begin + @testset "$T" for T in (DropObs, DropVars, Interpolate, Fill, LOCF, NOCB) + @test T() == T() + end end @testset "Drop" begin @testset "DropObs" begin - result = impute(a, DropObs(; context=ctx)) - expected = copy(a) - deleteat!(expected, [2, 3, 7]) + @testset "Vector" begin + result = impute(a, DropObs(; context=ctx)) + expected = deleteat!(deepcopy(a), [2, 3, 7]) - @test result == expected - @test result == Impute.dropobs(a; context=ctx) + @test result == expected + @test result == Impute.dropobs(a; context=ctx) - a2 = copy(a) - Impute.dropobs!(a2; context=ctx) - @test a2 == expected + a2 = deepcopy(a) + Impute.dropobs!(a2; context=ctx) + @test a2 == expected + end + + @testset "Matrix" begin + # Because we're removing 2 of our 5 rows we need to change the limit. + ctx = Context(; limit=0.4) + result = impute(m, DropObs(; context=ctx)) + expected = m[[1, 4, 5], :] + + @test isequal(result, expected) + @test isequal(result, Impute.dropobs(m; context=ctx)) + @test isequal(collect(result'), Impute.dropobs(collect(m'); dims=2, context=ctx)) + + m_ = Impute.dropobs!(m; context=ctx) + # The mutating test is broken because we need to making a copy of + # the original matrix + @test_broken isequal(m, expected) + @test isequal(m_, expected) + end + + @testset "Tables" begin + ctx = Context(; limit=0.4) + @testset "DataFrame" begin + df = deepcopy(table) + result = impute(df, DropObs(; context=ctx)) + expected = dropmissing(df) + + @test isequal(result, expected) + @test isequal(result, Impute.dropobs(df; context=ctx)) + + df_ = Impute.dropobs!(df; context=ctx) + # The mutating test is broken because we need to making a copy of + # the original table + @test_broken isequal(df, expected) + @test isequal(df_, expected) + end + + @testset "Column Table" begin + coltab = Tables.columntable(table) + + result = impute(coltab, DropObs(; context=ctx)) + expected = Tables.columntable(dropmissing(table)) + + @test isequal(result, expected) + @test isequal(result, Impute.dropobs(coltab; context=ctx)) + + coltab_ = Impute.dropobs!(coltab; context=ctx) + # The mutating test is broken because we need to making a copy of + # the original table + @test_broken isequal(coltab, expected) + @test isequal(coltab_, expected) + end + + @testset "Row Table" begin + rowtab = Tables.rowtable(table) + result = impute(rowtab, DropObs(; context=ctx)) + expected = Tables.rowtable(dropmissing(table)) + + @show result + @show expected + @test isequal(result, expected) + @test isequal(result, Impute.dropobs(rowtab; context=ctx)) + + rowtab_ = Impute.dropobs!(rowtab; context=ctx) + # The mutating test is broken because we need to making a copy of + # the original table + # @test_broken isequal(rowtab, expected) + @test isequal(rowtab_, expected) + end + end end + @testset "DropVars" begin @testset "Vector" begin @test_throws MethodError Impute.dropvars(a) end @testset "Matrix" begin - m = reshape(a, 5, 4) - + ctx = Context(; limit=0.5) result = impute(m, DropVars(; context=ctx)) - expected = copy(m)[:, 2:4] + expected = copy(m)[:, 3:4] @test isequal(result, expected) @test isequal(result, Impute.dropvars(m; context=ctx)) - @test isequal(result', Impute.dropvars(m'; dims=2, context=ctx)) + @test isequal(collect(result'), Impute.dropvars(collect(m'); dims=2, context=ctx)) - Impute.dropvars!(m; context=ctx) + m_ = Impute.dropvars!(m; context=ctx) # The mutating test is broken because we need to making a copy of # the original matrix @test_broken isequal(m, expected) + @test isequal(m_, expected) end @testset "Tables" begin - orig = DataFrame( - :sin => allowmissing(sin.(1.0:1.0:20.0)), - :cos => allowmissing(sin.(1.0:1.0:20.0)), - ) - - orig.sin[[2, 3, 7, 12, 19]] .= missing - orig.cos[[4, 9]] .= missing - @testset "DataFrame" begin - df = deepcopy(orig) + df = deepcopy(table) result = impute(df, DropVars(; context=ctx)) expected = select(df, :cos) + @show result + @show expected @test isequal(result, expected) @test isequal(result, Impute.dropvars(df; context=ctx)) @@ -86,7 +164,7 @@ import Impute: end @testset "Column Table" begin - coltab = Tables.columntable(orig) + coltab = Tables.columntable(table) result = impute(coltab, DropVars(; context=ctx)) expected = Tables.columntable(Tables.select(coltab, :cos)) @@ -101,7 +179,7 @@ import Impute: end @testset "Row Table" begin - rowtab = Tables.rowtable(orig) + rowtab = Tables.rowtable(table) result = impute(rowtab, DropVars(; context=ctx)) expected = Tables.rowtable(Tables.select(rowtab, :cos)) @@ -163,6 +241,20 @@ import Impute: Impute.fill!(a2; context=ctx) @test a2 == result end + + @testset "Matrix" begin + ctx = Context(; limit=1.0) + expected = Matrix(Impute.dropobs(dataset("boot", "neuro"); context=ctx)) + data = Matrix(dataset("boot", "neuro")) + + result = impute(data, Fill(; value=0.0, context=ctx)) + @test size(result) == size(data) + @test result == Impute.fill(data; value=0.0, context=ctx) + + data2 = copy(data) + Impute.fill!(data2; value=0.0, context=ctx) + @test data2 == result + end end @testset "LOCF" begin @@ -195,69 +287,6 @@ import Impute: @test a2 == result end - @testset "DataFrame" begin - ctx = Context(; limit=1.0) - @testset "Single DataFrame" begin - data = dataset("boot", "neuro") - df = impute(data, Interpolate(; context=ctx)) - @test isequal(df, Impute.interp(data; context=ctx)) - end - @testset "GroupedDataFrame" begin - hod = repeat(1:24, 12 * 10) - obj = repeat(1:12, 24 * 10) - n = length(hod) - - df = DataFrame( - :hod => hod, - :obj => obj, - :val => allowmissing( - [sin(x) * cos(y) for (x, y) in zip(hod, obj)] - ), - ) - - df.val[rand(1:n, 20)] .= missing - gdf1 = groupby(deepcopy(df), [:hod, :obj]) - gdf2 = groupby(df, [:hod, :obj]) - - f1 = Impute.interp(; context=ctx) ∘ Impute.locf!() ∘ Impute.nocb!() - f2 = Impute.interp!(; context=ctx) ∘ Impute.locf!() ∘ Impute.nocb!() - - result = vcat(f1.(gdf1)...) - @test df != result - @test size(result) == (24 * 12 * 10, 3) - @test all(!ismissing, Tables.matrix(result)) - - # Test that we can also mutate the dataframe directly - f2.(gdf2) - @test result == sort(df, (:hod, :obj)) - end - end - - @testset "Matrix" begin - ctx = Context(; limit=1.0) - expected = Matrix(Impute.dropobs(dataset("boot", "neuro"); context=ctx)) - data = Matrix(dataset("boot", "neuro")) - - @testset "Drop" begin - result = impute(data, DropObs(; context=ctx)) - @test size(result, 1) == 4 - @test result == Impute.dropobs(data; context=ctx, dims=1) - - @test result == expected - @test Impute.dropobs(data'; dims=2, context=ctx) == expected' - end - - @testset "Fill" begin - result = impute(data, Fill(; value=0.0, context=ctx)) - @test size(result) == size(data) - @test result == Impute.fill(data; value=0.0, context=ctx) - - data2 = copy(data) - Impute.fill!(data2; value=0.0, context=ctx) - @test data2 == result - end - end - @testset "Not enough data" begin ctx = Context(; limit=0.1) @test_throws ImputeError impute(a, DropObs(; context=ctx)) @@ -290,6 +319,36 @@ import Impute: result3 = impute(orig, imp) @test result == result2 @test result == result3 + + @testset "GroupedDataFrame" begin + hod = repeat(1:24, 12 * 10) + obj = repeat(1:12, 24 * 10) + n = length(hod) + + df = DataFrame( + :hod => hod, + :obj => obj, + :val => allowmissing( + [sin(x) * cos(y) for (x, y) in zip(hod, obj)] + ), + ) + + df.val[rand(1:n, 20)] .= missing + gdf1 = groupby(deepcopy(df), [:hod, :obj]) + gdf2 = groupby(df, [:hod, :obj]) + + f1 = Impute.interp(; context=ctx) ∘ Impute.locf!() ∘ Impute.nocb!() + f2 = Impute.interp!(; context=ctx) ∘ Impute.locf!() ∘ Impute.nocb!() + + result = vcat(f1.(gdf1)...) + @test df != result + @test size(result) == (24 * 12 * 10, 3) + @test all(!ismissing, Tables.matrix(result)) + + # Test that we can also mutate the dataframe directly + f2.(gdf2) + @test result == sort(df, (:hod, :obj)) + end end @testset "Column Table" begin From 8c138e5bc8f45096f16b0dcbe0e072995346e4b6 Mon Sep 17 00:00:00 2001 From: rofinn Date: Wed, 31 Jul 2019 11:41:11 -0500 Subject: [PATCH 3/6] Added a few AxisArray tests. --- Project.toml | 6 +++--- test/runtests.jl | 55 ++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 54 insertions(+), 7 deletions(-) diff --git a/Project.toml b/Project.toml index f23dcfd..08facf1 100644 --- a/Project.toml +++ b/Project.toml @@ -4,9 +4,7 @@ authors = ["Invenia Technical Computing"] version = "0.2.0" [deps] -DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e" -Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" @@ -19,9 +17,11 @@ Tables = "0.2" julia = "1" [extras] +AxisArrays = "39de3d68-74b9-583c-8d2d-e117c070f3a9" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["DataFrames", "RDatasets", "Test"] +test = ["AxisArrays", "DataFrames", "Dates", "RDatasets", "Test"] diff --git a/test/runtests.jl b/test/runtests.jl index f2af10a..8528622 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,7 +1,9 @@ using Impute using Tables using Test +using AxisArrays using DataFrames +using Dates using RDatasets using Statistics using StatsBase @@ -29,6 +31,12 @@ import Impute: # We call collect to not have a wrapper type that references the same data. m = collect(reshape(a, 5, 4)) + aa = AxisArray( + deepcopy(m), + Axis{:time}(DateTime(2017, 6, 5, 5):Hour(1):DateTime(2017, 6, 5, 9)), + Axis{:id}(1:4) + ) + table = DataFrame( :sin => allowmissing(sin.(1.0:1.0:20.0)), :cos => allowmissing(sin.(1.0:1.0:20.0)), @@ -111,8 +119,6 @@ import Impute: result = impute(rowtab, DropObs(; context=ctx)) expected = Tables.rowtable(dropmissing(table)) - @show result - @show expected @test isequal(result, expected) @test isequal(result, Impute.dropobs(rowtab; context=ctx)) @@ -123,6 +129,22 @@ import Impute: @test isequal(rowtab_, expected) end end + + @testset "AxisArray" begin + # Because we're removing 2 of our 5 rows we need to change the limit. + ctx = Context(; limit=0.4) + result = impute(aa, DropObs(; context=ctx)) + expected = aa[[1, 4, 5], :] + + @test isequal(result, expected) + @test isequal(result, Impute.dropobs(aa; context=ctx)) + + aa_ = Impute.dropobs!(aa; context=ctx) + # The mutating test is broken because we need to making a copy of + # the original matrix + @test_broken isequal(aa, expected) + @test isequal(aa_, expected) + end end @testset "DropVars" begin @@ -152,8 +174,6 @@ import Impute: result = impute(df, DropVars(; context=ctx)) expected = select(df, :cos) - @show result - @show expected @test isequal(result, expected) @test isequal(result, Impute.dropvars(df; context=ctx)) @@ -192,6 +212,20 @@ import Impute: @test_broken isequal(rowtab, expected) end end + @testset "AxisArray" begin + ctx = Context(; limit=0.5) + result = impute(aa, DropVars(; context=ctx)) + expected = copy(aa)[:, 3:4] + + @test isequal(result, expected) + @test isequal(result, Impute.dropvars(aa; context=ctx)) + + aa_ = Impute.dropvars!(aa; context=ctx) + # The mutating test is broken because we need to making a copy of + # the original matrix + @test_broken isequal(aa, expected) + @test isequal(aa_, expected) + end end end @@ -383,6 +417,19 @@ import Impute: # Confirm that we don't have any more missing values @test all(!ismissing, result) end + + @testset "AxisArray" begin + data = AxisArray( + Matrix(orig), + Axis{:row}(1:size(orig, 1)), + Axis{:V}(names(orig)), + ) + result = Impute.interp(data; context=ctx) |> Impute.locf!() |> Impute.nocb!() + + @test size(result) == size(data) + # Confirm that we don't have any more missing values + @test all(!ismissing, result) + end end @testset "Alternate missing functions" begin From c7e1d440d3e7c742f1baa9699c84d6187df6b3b6 Mon Sep 17 00:00:00 2001 From: rofinn Date: Wed, 31 Jul 2019 15:39:18 -0500 Subject: [PATCH 4/6] Warn that drop isn't an in-place operation. --- src/imputors/drop.jl | 34 +++++++++++++++++++--------------- src/imputors/fill.jl | 2 +- test/deprecated.jl | 5 +++-- test/runtests.jl | 9 +++++---- 4 files changed, 28 insertions(+), 22 deletions(-) diff --git a/src/imputors/drop.jl b/src/imputors/drop.jl index bbd5856..cd053ca 100644 --- a/src/imputors/drop.jl +++ b/src/imputors/drop.jl @@ -22,8 +22,7 @@ julia> impute(M, DropObs(; context=Context(; limit=1.0)); dims=2) 1.1 2.2 5.5 ``` -WARNING: Observations can only be removed in-place for some input data types others -(e.g., matrices, array views, some tables) will require a copy/collect. +WARNING: In-place dropping of observations is not supported. """ struct DropObs <: Imputor context::AbstractContext @@ -32,19 +31,13 @@ end # TODO: Switch to using Base.@kwdef on 1.1 DropObs(; context=Context()) = DropObs(context) -function impute!(data::AbstractVector, imp::DropObs) - parent(data) === data || return impute!(parent(data), imp) - +function impute(data::AbstractVector, imp::DropObs) imp.context() do c - filter!(x -> !ismissing!(c, x), data) + return filter(x -> !ismissing!(c, x), data) end - - return data end -function impute!(data::AbstractMatrix, imp::DropObs; dims=1) - # parent(data) === data || return impute!(parent(data), imp) - +function impute(data::AbstractMatrix, imp::DropObs; dims=1) imp.context() do c return filterobs(data; dims=dims) do obs !ismissing!(c, obs) @@ -52,7 +45,7 @@ function impute!(data::AbstractMatrix, imp::DropObs; dims=1) end end -function impute!(table, imp::DropObs) +function impute(table, imp::DropObs) imp.context() do c @assert istable(table) rows = Tables.rows(table) @@ -94,7 +87,7 @@ julia> impute(M, DropVars(; context=Context(; limit=0.2)); dims=2) 1.1 2.2 3.3 missing 5.5 ``` -WARNING: Variables cannot be removed in-place, so this method will internally perform a copy. +WARNING: In-place dropping of variables is not supported. """ struct DropVars <: Imputor context::AbstractContext @@ -103,7 +96,7 @@ end # TODO: Switch to using Base.@kwdef on 1.1 DropVars(; context=Context()) = DropVars(context) -function impute!(data::AbstractMatrix, imp::DropVars; dims=1) +function impute(data::AbstractMatrix, imp::DropVars; dims=1) # parent(data) === data || return impute!(parent(data), imp) imp.context() do c @@ -113,7 +106,7 @@ function impute!(data::AbstractMatrix, imp::DropVars; dims=1) end end -function impute!(table, imp::DropVars) +function impute(table, imp::DropVars) istable(table) || throw(MethodError(impute!, (table, imp))) cols = Tables.columns(table) @@ -127,3 +120,14 @@ function impute!(table, imp::DropVars) return table end end + +# Add impute! methods to override the default behaviour in imputors.jl +function impute!(data::AbstractMatrix, imp::Union{DropObs, DropVars}) + @warn "In-place dropping of observations is not supported" + return impute(data, imp) +end + +function impute!(data, imp::Union{DropObs, DropVars}) + @warn "In-place dropping of observations is not supported" + return impute(data, imp) +end diff --git a/src/imputors/fill.jl b/src/imputors/fill.jl index 395fb9d..abaefc6 100644 --- a/src/imputors/fill.jl +++ b/src/imputors/fill.jl @@ -38,7 +38,7 @@ function impute!(data::AbstractVector, imp::Fill) imp.context() do c fill_val = if isa(imp.value, Function) # Call `deepcopy` because we can trust that it's available for all types. - imp.value(Impute.drop(deepcopy(data); context=c)) + imp.value(Impute.drop(data; context=c)) else imp.value end diff --git a/test/deprecated.jl b/test/deprecated.jl index 961d86e..59eca7e 100644 --- a/test/deprecated.jl +++ b/test/deprecated.jl @@ -12,8 +12,9 @@ # Mutating method a2 = copy(a) - Impute.drop!(a2; limit=0.2) - @test a2 == expected + a2_ = Impute.drop!(a2; limit=0.2) + @test_broken a2 == expected + @test a2_ == expected end @testset "Interpolate" begin diff --git a/test/runtests.jl b/test/runtests.jl index 8528622..38ec220 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -60,8 +60,9 @@ import Impute: @test result == Impute.dropobs(a; context=ctx) a2 = deepcopy(a) - Impute.dropobs!(a2; context=ctx) - @test a2 == expected + a2_ = Impute.dropobs!(a2; context=ctx) + @test_broken a2 == expected + @test a2_ == expected end @testset "Matrix" begin @@ -440,8 +441,8 @@ import Impute: @test Impute.dropobs(data1; context=ctx1) == dropmissing(data1) - result1 = Impute.interp(data1; context=ctx1) |> Impute.dropobs!() - result2 = Impute.interp(data2; context=ctx2) |> Impute.dropobs!(; context=ctx2) + result1 = Impute.interp(data1; context=ctx1) |> Impute.dropobs() + result2 = Impute.interp(data2; context=ctx2) |> Impute.dropobs(; context=ctx2) @test result1 == result2 end From 40c4a66e9790c848cc876aee89a07ddba22c1a3b Mon Sep 17 00:00:00 2001 From: rofinn Date: Wed, 31 Jul 2019 17:44:37 -0500 Subject: [PATCH 5/6] Drop warning and special case dropobs! for vectors. --- README.md | 5 ++++- docs/src/index.md | 5 ++++- src/imputors/drop.jl | 26 ++++++++------------------ test/deprecated.jl | 5 ++--- test/runtests.jl | 5 ++--- 5 files changed, 20 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 6943409..414ddcf 100644 --- a/README.md +++ b/README.md @@ -111,4 +111,7 @@ julia> Impute.interp(df) |> Impute.locf() |> Impute.nocb() │ 469 │ -247.6 │ -180.7 │ -70.9 │ 33.7 │ 114.8 │ 222.5 │ ``` -**Warning**: Your approach should depend on the properties of you data (e.g., [MCAR, MAR, MNAR](https://en.wikipedia.org/wiki/Missing_data#Types_of_missing_data)). +**Warning:** + +- Your approach should depend on the properties of you data (e.g., [MCAR, MAR, MNAR](https://en.wikipedia.org/wiki/Missing_data#Types_of_missing_data)). +- In-place calls aren't guaranteedto mutate the original data, but it will try avoid copying if possible. diff --git a/docs/src/index.md b/docs/src/index.md index 31a547f..07bc71f 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -52,4 +52,7 @@ Finally, we can chain multiple simple methods together to give a complete datase Impute.interp(df) |> Impute.locf() |> Impute.nocb() ``` -Warning: Your approach should depend on the properties of you data (e.g., [MCAR, MAR, MNAR](https://en.wikipedia.org/wiki/Missing_data#Types_of_missing_data)). +**Warning:** + +- Your approach should depend on the properties of you data (e.g., [MCAR, MAR, MNAR](https://en.wikipedia.org/wiki/Missing_data#Types_of_missing_data)). +- In-place calls aren't guaranteedto mutate the original data, but it will try avoid copying if possible. diff --git a/src/imputors/drop.jl b/src/imputors/drop.jl index cd053ca..2b86da6 100644 --- a/src/imputors/drop.jl +++ b/src/imputors/drop.jl @@ -21,8 +21,6 @@ julia> impute(M, DropObs(; context=Context(; limit=1.0)); dims=2) 1.0 2.0 5.0 1.1 2.2 5.5 ``` - -WARNING: In-place dropping of observations is not supported. """ struct DropObs <: Imputor context::AbstractContext @@ -31,10 +29,13 @@ end # TODO: Switch to using Base.@kwdef on 1.1 DropObs(; context=Context()) = DropObs(context) +# Special case impute! for vectors because we know filter! will work +function impute!(data::Vector, imp::DropObs) + imp.context(c -> filter!(x -> !ismissing!(c, x), data)) +end + function impute(data::AbstractVector, imp::DropObs) - imp.context() do c - return filter(x -> !ismissing!(c, x), data) - end + imp.context(c -> filter(x -> !ismissing!(c, x), data)) end function impute(data::AbstractMatrix, imp::DropObs; dims=1) @@ -86,8 +87,6 @@ julia> impute(M, DropVars(; context=Context(; limit=0.2)); dims=2) 1×5 Array{Union{Missing, Float64},2}: 1.1 2.2 3.3 missing 5.5 ``` - -WARNING: In-place dropping of variables is not supported. """ struct DropVars <: Imputor context::AbstractContext @@ -97,8 +96,6 @@ end DropVars(; context=Context()) = DropVars(context) function impute(data::AbstractMatrix, imp::DropVars; dims=1) - # parent(data) === data || return impute!(parent(data), imp) - imp.context() do c return filtervars(data; dims=dims) do vars !ismissing!(c, vars) @@ -122,12 +119,5 @@ function impute(table, imp::DropVars) end # Add impute! methods to override the default behaviour in imputors.jl -function impute!(data::AbstractMatrix, imp::Union{DropObs, DropVars}) - @warn "In-place dropping of observations is not supported" - return impute(data, imp) -end - -function impute!(data, imp::Union{DropObs, DropVars}) - @warn "In-place dropping of observations is not supported" - return impute(data, imp) -end +impute!(data::AbstractMatrix, imp::Union{DropObs, DropVars}) = impute(data, imp) +impute!(data, imp::Union{DropObs, DropVars}) = impute(data, imp) diff --git a/test/deprecated.jl b/test/deprecated.jl index 59eca7e..961d86e 100644 --- a/test/deprecated.jl +++ b/test/deprecated.jl @@ -12,9 +12,8 @@ # Mutating method a2 = copy(a) - a2_ = Impute.drop!(a2; limit=0.2) - @test_broken a2 == expected - @test a2_ == expected + Impute.drop!(a2; limit=0.2) + @test a2 == expected end @testset "Interpolate" begin diff --git a/test/runtests.jl b/test/runtests.jl index 38ec220..3a134c9 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -60,9 +60,8 @@ import Impute: @test result == Impute.dropobs(a; context=ctx) a2 = deepcopy(a) - a2_ = Impute.dropobs!(a2; context=ctx) - @test_broken a2 == expected - @test a2_ == expected + Impute.dropobs!(a2; context=ctx) + @test a2 == expected end @testset "Matrix" begin From 5c45db024c4d0f52ed6cdd6c789947d141dd3b3e Mon Sep 17 00:00:00 2001 From: rofinn Date: Thu, 1 Aug 2019 14:39:51 -0500 Subject: [PATCH 6/6] Add links to issues for table and array mutability traits. --- README.md | 3 +++ docs/src/index.md | 3 +++ 2 files changed, 6 insertions(+) diff --git a/README.md b/README.md index 414ddcf..c744373 100644 --- a/README.md +++ b/README.md @@ -115,3 +115,6 @@ julia> Impute.interp(df) |> Impute.locf() |> Impute.nocb() - Your approach should depend on the properties of you data (e.g., [MCAR, MAR, MNAR](https://en.wikipedia.org/wiki/Missing_data#Types_of_missing_data)). - In-place calls aren't guaranteedto mutate the original data, but it will try avoid copying if possible. + In the future, it may be possible to detect whether in-place operations are permitted on an array or table using traits: + - https://github.com/JuliaData/Tables.jl/issues/116 + - https://github.com/JuliaDiffEq/ArrayInterface.jl/issues/22 diff --git a/docs/src/index.md b/docs/src/index.md index 07bc71f..a2ddf2e 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -56,3 +56,6 @@ Impute.interp(df) |> Impute.locf() |> Impute.nocb() - Your approach should depend on the properties of you data (e.g., [MCAR, MAR, MNAR](https://en.wikipedia.org/wiki/Missing_data#Types_of_missing_data)). - In-place calls aren't guaranteedto mutate the original data, but it will try avoid copying if possible. + In the future, it may be possible to detect whether in-place operations are permitted on an array or table using traits: + - https://github.com/JuliaData/Tables.jl/issues/116 + - https://github.com/JuliaDiffEq/ArrayInterface.jl/issues/22