From 1f99bbd1886543238af6823cd7e1ab93410f91bc Mon Sep 17 00:00:00 2001 From: rofinn Date: Fri, 5 Jul 2019 17:20:13 -0500 Subject: [PATCH] Updated tests to new API and moved existing deprecated tests to a different file. --- src/Impute.jl | 8 +-- src/context.jl | 16 ----- src/deprecated.jl | 23 +++++++ test/deprecated.jl | 152 +++++++++++++++++++++++++++++++++++++++++++++ test/runtests.jl | 90 ++++++++++++++------------- 5 files changed, 226 insertions(+), 63 deletions(-) create mode 100644 test/deprecated.jl diff --git a/src/Impute.jl b/src/Impute.jl index e8e143c..7c5c062 100644 --- a/src/Impute.jl +++ b/src/Impute.jl @@ -69,10 +69,10 @@ let # NOTE: The @eval begin - $f(data; kwargs...) = impute($typename(; context=Context(Dict(kwargs...))), data) - $f!(data; kwargs...) = impute!($typename(; context=Context(Dict(kwargs...))), data) - $f(; kwargs...) = data -> impute($typename(; context=Context(Dict(kwargs...))), data) - $f!(; kwargs...) = data -> impute!($typename(; context=Context(Dict(kwargs...))), data) + $f(data; kwargs...) = impute($typename(; _extract_context_kwargs(kwargs...)...), data) + $f!(data; kwargs...) = impute!($typename(; _extract_context_kwargs(kwargs...)...), data) + $f(; kwargs...) = data -> impute($typename(; _extract_context_kwargs(kwargs...)...), data) + $f!(; kwargs...) = data -> impute!($typename(; _extract_context_kwargs(kwargs...)...), data) end end end diff --git a/src/context.jl b/src/context.jl index 68b2000..6645174 100644 --- a/src/context.jl +++ b/src/context.jl @@ -108,22 +108,6 @@ function Context(; Context(0, 0, limit, is_missing, on_complete) end -# The constructor only exists for legacy reasons -# We should drop this when we're ready to stop accepting limit in -# arbitrary impute functions. -function Context(d::Dict) - if haskey(d, :context) - return d[:context] - else haskey(d, :limit) - return Context(; - # We using a different default limit value here for legacy reason. - limit=get(d, :limit, 1.0), - is_missing=get(d, :is_missing, ismissing), - on_complete=get(d, :on_complete, complete), - ) - end -end - function (ctx::Context)(f::Function) _ctx = copy(ctx) _ctx.num = 0 diff --git a/src/deprecated.jl b/src/deprecated.jl index 29a6ea0..0060035 100644 --- a/src/deprecated.jl +++ b/src/deprecated.jl @@ -156,3 +156,26 @@ end # Misc Deprecations # ##################### Base.@deprecate Fill(val; kwargs...) Fill(; value=val, kwargs...) + +# This function is just used to support legacy behaviour and should be removed in a +# future release when we dropping accepting the limit kwarg to impute functions. +function _extract_context_kwargs(kwargs...) + d = Dict(kwargs...) + limit = 1.0 + + if haskey(d, :limit) + warn( + "Passing `limit` directly to impute functions is deprecated. " * + "Please pass a `context` in the future." + ) + + limit = d[:limit] + delete!(d, :limit) + end + + if !haskey(d, :context) + d[:context] = Context(; limit=limit) + end + + return d +end diff --git a/test/deprecated.jl b/test/deprecated.jl new file mode 100644 index 0000000..8c80832 --- /dev/null +++ b/test/deprecated.jl @@ -0,0 +1,152 @@ +@testset "deprecated" begin + a = Vector{Union{Float64, Missing}}(1.0:1.0:20.0) + a[[2, 3, 7]] .= missing + mask = map(!ismissing, a) + + @testset "Drop" begin + result = impute(a, :drop; limit=0.2) + expected = copy(a) + deleteat!(expected, [2, 3, 7]) + + @test result == expected + end + + @testset "Interpolate" begin + result = impute(a, :interp; limit=0.2) + @test result == collect(1.0:1.0:20) + @test result == interp(a) + + # Test interpolation between identical points + b = ones(Union{Float64, Missing}, 20) + b[[2, 3, 7]] .= missing + @test interp(b) == ones(Union{Float64, Missing}, 20) + + # Test interpolation at endpoints + b = ones(Union{Float64, Missing}, 20) + b[[1, 3, 20]] .= missing + result = interp(b) + @test ismissing(result[1]) + @test ismissing(result[20]) + end + + @testset "Fill" begin + @testset "Value" begin + fill_val = -1.0 + result = impute(a, :fill, fill_val; limit=0.2) + expected = copy(a) + expected[[2, 3, 7]] .= fill_val + + @test result == expected + end + + @testset "Mean" begin + result = impute(a, :fill; limit=0.2) + expected = copy(a) + expected[[2, 3, 7]] .= mean(a[mask]) + + @test result == expected + end + end + + @testset "LOCF" begin + result = impute(a, :locf; limit=0.2) + expected = copy(a) + expected[2] = 1.0 + expected[3] = 1.0 + expected[7] = 6.0 + + @test result == expected + end + + @testset "NOCB" begin + result = impute(a, :nocb; limit=0.2) + expected = copy(a) + expected[2] = 4.0 + expected[3] = 4.0 + expected[7] = 8.0 + + @test result == expected + end + + @testset "DataFrame" begin + data = dataset("boot", "neuro") + df = impute(data, :interp; limit=1.0) + end + + @testset "Matrix" begin + data = Matrix(dataset("boot", "neuro")) + + @testset "Drop" begin + result = Iterators.drop(data) + @test size(result, 1) == 4 + end + + @testset "Fill" begin + result = impute(data, :fill, 0.0; limit=1.0) + @test size(result) == size(data) + end + end + + @testset "Not enough data" begin + @test_throws ImputeError impute(a, :drop) + end + + @testset "Chain" begin + orig = dataset("boot", "neuro") + + @testset "DataFrame" begin + result = chain( + orig, + Impute.Interpolate(), + Impute.LOCF(), + Impute.NOCB(); + limit=1.0 + ) + + @test size(result) == size(orig) + # Confirm that we don't have any more missing values + @test !any(ismissing, Matrix(result)) + end + + @testset "Column Table" begin + data = Tables.columntable(orig) + result = chain( + data, + Impute.Interpolate(), + Impute.LOCF(), + Impute.NOCB(); + limit=1.0 + ) |> Tables.matrix + + @test size(result) == size(orig) + # Confirm that we don't have any more missing values + @test !any(ismissing, result) + end + + @testset "Matrix" begin + data = Matrix(orig) + result = chain( + data, + Impute.Interpolate(), + Impute.LOCF(), + Impute.NOCB(); + limit=1.0 + ) + + @test size(result) == size(data) + # Confirm that we don't have any more missing values + @test !any(ismissing, result) + end + end + + @testset "Alternate missing functions" begin + data1 = dataset("boot", "neuro") # Missing values with `missing` + data2 = impute(data1, :fill, NaN; limit=1.0) # Missing values with `NaN` + + @test impute(data1, :drop; limit=1.0) == dropmissing(data1) + + result1 = chain(data1, Impute.Interpolate(), Impute.Drop(); limit=1.0) + result2 = chain(data2, isnan, Impute.Interpolate(), Impute.Drop(); limit=1.0) + @test result1 == result2 + end +end diff --git a/test/runtests.jl b/test/runtests.jl index c1f954a..c5b7f55 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -6,35 +6,37 @@ using RDatasets using Statistics using StatsBase -import Impute: Drop, Context, WeightedContext +import Impute: Drop, Interpolate, Fill, LOCF, NOCB, Context, WeightedContext, ImputeError @testset "Impute" begin a = Vector{Union{Float64, Missing}}(1.0:1.0:20.0) a[[2, 3, 7]] .= missing mask = map(!ismissing, a) + ctx = Context(; limit=0.2) @testset "Drop" begin - result = impute(a, :drop; limit=0.2) + result = impute(Drop(; context=ctx), a) expected = copy(a) deleteat!(expected, [2, 3, 7]) @test result == expected + @test result == Impute.drop(a; context=ctx) end @testset "Interpolate" begin - result = impute(a, :interp; limit=0.2) + result = impute(Interpolate(; context=ctx), a) @test result == collect(1.0:1.0:20) - @test result == interp(a) + @test result == interp(a; context=ctx) # Test interpolation between identical points b = ones(Union{Float64, Missing}, 20) b[[2, 3, 7]] .= missing - @test interp(b) == ones(Union{Float64, Missing}, 20) + @test interp(b; context=ctx) == ones(Union{Float64, Missing}, 20) # Test interpolation at endpoints b = ones(Union{Float64, Missing}, 20) b[[1, 3, 20]] .= missing - result = interp(b) + result = interp(b; context=ctx) @test ismissing(result[1]) @test ismissing(result[20]) end @@ -42,76 +44,81 @@ import Impute: Drop, Context, WeightedContext @testset "Fill" begin @testset "Value" begin fill_val = -1.0 - result = impute(a, :fill, fill_val; limit=0.2) + result = impute(Fill(; value=fill_val, context=ctx), a) expected = copy(a) expected[[2, 3, 7]] .= fill_val @test result == expected + @test result == Impute.fill(a; value=fill_val, context=ctx) end @testset "Mean" begin - result = impute(a, :fill; limit=0.2) + result = impute(Fill(; value=mean, context=ctx), a) expected = copy(a) expected[[2, 3, 7]] .= mean(a[mask]) @test result == expected + @test result == Impute.fill(a; value=mean, context=ctx) end end @testset "LOCF" begin - result = impute(a, :locf; limit=0.2) + result = impute(LOCF(; context=ctx), a) expected = copy(a) expected[2] = 1.0 expected[3] = 1.0 expected[7] = 6.0 @test result == expected + @test result == Impute.locf(a; context=ctx) end @testset "NOCB" begin - result = impute(a, :nocb; limit=0.2) + result = impute(NOCB(; context=ctx), a) expected = copy(a) expected[2] = 4.0 expected[3] = 4.0 expected[7] = 8.0 @test result == expected + @test result == Impute.nocb(a; context=ctx) end @testset "DataFrame" begin + ctx = Context(; limit=1.0) data = dataset("boot", "neuro") - df = impute(data, :interp; limit=1.0) + df = impute(Interpolate(; context=ctx), data) end @testset "Matrix" begin + ctx = Context(; limit=1.0) data = Matrix(dataset("boot", "neuro")) @testset "Drop" begin - result = Iterators.drop(data) + result = impute(Drop(; context=ctx), data) @test size(result, 1) == 4 + @test result == Impute.drop(data; context=ctx) end @testset "Fill" begin - result = impute(data, :fill, 0.0; limit=1.0) + result = impute(Fill(; value=0.0, context=ctx), data) @test size(result) == size(data) + @test result == Impute.fill(data; value=0.0, context=ctx) end end @testset "Not enough data" begin - @test_throws ImputeError impute(a, :drop) + ctx = Context(; limit=0.1) + @test_throws ImputeError impute(Drop(; context=ctx), a) + @test_throws ImputeError Impute.drop(a; context=ctx) end @testset "Chain" begin orig = dataset("boot", "neuro") + ctx = Context(; limit=1.0) @testset "DataFrame" begin - result = chain( - orig, - Impute.Interpolate(), - Impute.LOCF(), - Impute.NOCB(); - limit=1.0 - ) + result = Impute.interp(orig; context=ctx) |> Impute.locf() |> Impute.nocb() @test size(result) == size(orig) # Confirm that we don't have any more missing values @@ -119,14 +126,11 @@ import Impute: Drop, Context, WeightedContext end @testset "Column Table" begin - data = Tables.columntable(orig) - result = chain( - data, - Impute.Interpolate(), - Impute.LOCF(), - Impute.NOCB(); - limit=1.0 - ) |> Tables.matrix + result = Tables.columntable(orig) |> + Impute.interp(; context=ctx) |> + Impute.locf() |> + Impute.nocb() |> + Tables.matrix @test size(result) == size(orig) # Confirm that we don't have any more missing values @@ -135,13 +139,7 @@ import Impute: Drop, Context, WeightedContext @testset "Matrix" begin data = Matrix(orig) - result = chain( - data, - Impute.Interpolate(), - Impute.LOCF(), - Impute.NOCB(); - limit=1.0 - ) + result = Impute.interp(data; context=ctx) |> Impute.locf() |> Impute.nocb() @test size(result) == size(data) # Confirm that we don't have any more missing values @@ -150,20 +148,24 @@ import Impute: Drop, Context, WeightedContext end @testset "Alternate missing functions" begin - data1 = dataset("boot", "neuro") # Missing values with `missing` - data2 = impute(data1, :fill, NaN; limit=1.0) # Missing values with `NaN` + ctx1 = Context(; limit=1.0) + ctx2 = Context(; limit=1.0, is_missing=isnan) + data1 = dataset("boot", "neuro") # Missing values with `missing` + data2 = Impute.fill(data1; value=NaN, context=ctx1) # Missing values with `NaN` - @test impute(data1, :drop; limit=1.0) == dropmissing(data1) + @test Impute.drop(data1; context=ctx1) == dropmissing(data1) + + result1 = Impute.interp(data1; context=ctx1) |> Impute.drop() + result2 = Impute.interp(data2; context=ctx2) |> Impute.drop(; context=ctx2) - result1 = chain(data1, Impute.Interpolate(), Impute.Drop(); limit=1.0) - result2 = chain(data2, isnan, Impute.Interpolate(), Impute.Drop(); limit=1.0) @test result1 == result2 end @testset "Contexts" begin @testset "Base" begin - @test_throws ImputeError impute(a, :drop; limit=0.1) - @test_throws ImputeError impute(Drop(), Context(; limit=0.1), a) + ctx = Context(; limit=0.1) + @test_throws ImputeError Impute.drop(a; context=ctx) + @test_throws ImputeError impute(Drop(; context=ctx), a) end @testset "Weighted" begin @@ -182,4 +184,6 @@ import Impute: Drop, Context, WeightedContext @test_throws ImputeError impute(Drop(), ctx, a) end end + + include("deprecated.jl") end