Skip to content

Commit

Permalink
wrap outstanding tests in data.jl in testset
Browse files Browse the repository at this point in the history
  • Loading branch information
bkamins authored Oct 9, 2019
1 parent 8bec53d commit 1c8ca71
Showing 1 changed file with 82 additions and 91 deletions.
173 changes: 82 additions & 91 deletions test/data.jl
Original file line number Diff line number Diff line change
Expand Up @@ -272,101 +272,92 @@ end
@test m2[!, :A] ["a", "b", "a", missing, "c"]
end

Random.seed!(1)
df1 = DataFrame(
a = rand(Union{Symbol, Missing}[:x,:y], 10),
b = rand(Union{Symbol, Missing}[:A,:B], 10),
v1 = Vector{Union{Float64, Missing}}(randn(10))
)

df2 = DataFrame(
a = Union{Symbol, Missing}[:x,:y][[1,2,1,1,2]],
b = Union{Symbol, Missing}[:A,:B,:C][[1,1,1,2,3]],
v2 = Vector{Union{Float64, Missing}}(randn(5))
)
df2[1,:a] = missing

m1 = join(df1, df2, on = [:a,:b])
@test m1[!, :a] == Union{Missing, Symbol}[:x, :x, :y, :y, :y, :x, :x, :x]
m2 = join(df1, df2, on = [:a,:b], kind = :outer)
@test ismissing(m2[10,:v2])
@test m2[!, :a] [:x, :x, :y, :y, :y, :x, :x, :y, :x, :y, missing, :y]

Random.seed!(1)
function spltdf(d)
d[!, :x1] = map(x -> x[1], d[!, :a])
d[!, :x2] = map(x -> x[2], d[!, :a])
d[!, :x3] = map(x -> x[3], d[!, :a])
d
@testset "join tests" begin
Random.seed!(1)
df1 = DataFrame(a = rand(Union{Symbol, Missing}[:x,:y], 10),
b = rand(Union{Symbol, Missing}[:A,:B], 10),
v1 = Vector{Union{Float64, Missing}}(randn(10)))

df2 = DataFrame(a = Union{Symbol, Missing}[:x,:y][[1,2,1,1,2]],
b = Union{Symbol, Missing}[:A,:B,:C][[1,1,1,2,3]],
v2 = Vector{Union{Float64, Missing}}(randn(5)))
df2[1,:a] = missing

m1 = join(df1, df2, on = [:a,:b])
@test m1[!, :a] == Union{Missing, Symbol}[:x, :x, :y, :y, :y, :x, :x, :x]
m2 = join(df1, df2, on = [:a,:b], kind = :outer)
@test ismissing(m2[10,:v2])
@test m2[!, :a] [:x, :x, :y, :y, :y, :x, :x, :y, :x, :y, missing, :y]

Random.seed!(1)
function spltdf(d)
d[!, :x1] = map(x -> x[1], d[!, :a])
d[!, :x2] = map(x -> x[2], d[!, :a])
d[!, :x3] = map(x -> x[3], d[!, :a])
d
end
df1 = DataFrame(a = ["abc", "abx", "axz", "def", "dfr"], v1 = randn(5))
df2 = DataFrame(a = ["def", "abc","abx", "axz", "xyz"], v2 = randn(5))
spltdf(df1)
spltdf(df2)

m1 = join(df1, df2, on = :a, makeunique=true)
m2 = join(df1, df2, on = [:x1, :x2, :x3], makeunique=true)
@test m1[!, :a] == m2[!, :a]
end
df1 = DataFrame(
a = ["abc", "abx", "axz", "def", "dfr"],
v1 = randn(5)
)
df1 = spltdf(df1)
df2 = DataFrame(
a = ["def", "abc","abx", "axz", "xyz"],
v2 = randn(5)
)
df2 = spltdf(df2)

m1 = join(df1, df2, on = :a, makeunique=true)
m2 = join(df1, df2, on = [:x1, :x2, :x3], makeunique=true)
@test sort(m1[!, :a]) == sort(m2[!, :a])

# test nonunique() with extra argument
df1 = DataFrame(a = Union{String, Missing}["a", "b", "a", "b", "a", "b"],
b = Vector{Union{Int, Missing}}(1:6),
c = Union{Int, Missing}[1:3;1:3])
df = vcat(df1, df1)
@test findall(nonunique(df)) == collect(7:12)
@test findall(nonunique(df, :)) == collect(7:12)
@test findall(nonunique(df, Colon())) == collect(7:12)
@test findall(nonunique(df, :a)) == collect(3:12)
@test findall(nonunique(df, [:a, :c])) == collect(7:12)
@test findall(nonunique(df, r"[ac]")) == collect(7:12)
@test findall(nonunique(df, Not(2))) == collect(7:12)
@test findall(nonunique(df, Not([2]))) == collect(7:12)
@test findall(nonunique(df, Not(:b))) == collect(7:12)
@test findall(nonunique(df, Not([:b]))) == collect(7:12)
@test findall(nonunique(df, Not([false, true, false]))) == collect(7:12)
@test findall(nonunique(df, [1, 3])) == collect(7:12)
@test findall(nonunique(df, 1)) == collect(3:12)

# Test unique() with extra argument
@test unique(df) == df1
@test unique(df, :) == df1
@test unique(df, Colon()) == df1
@test unique(df, 2:3) == df1
@test unique(df, 3) == df1[1:3,:]
@test unique(df, [1, 3]) == df1
@test unique(df, [:a, :c]) == df1
@test unique(df, r"[ac]") == df1
@test unique(df, Not(2)) == df1
@test unique(df, Not([2])) == df1
@test unique(df, Not(:b)) == df1
@test unique(df, Not([:b])) == df1
@test unique(df, Not([false, true, false])) == df1
@test unique(df, :a) == df1[1:2,:]
@test_throws ArgumentError unique(DataFrame())
@test_throws ArgumentError nonunique(DataFrame())

#test unique!() with extra argument
unique!(df, [1, 3])
@test df == df1

for cols in (r"[ac]", Not(:b), Not(2), Not([:b]), Not([2]), Not([false, true, false]))

@testset "nonunique, nonunique, unique! with extra argument" begin
df1 = DataFrame(a = Union{String, Missing}["a", "b", "a", "b", "a", "b"],
b = Vector{Union{Int, Missing}}(1:6),
c = Union{Int, Missing}[1:3;1:3])
df = vcat(df1, df1)
unique!(df, cols)
@test findall(nonunique(df)) == collect(7:12)
@test findall(nonunique(df, :)) == collect(7:12)
@test findall(nonunique(df, Colon())) == collect(7:12)
@test findall(nonunique(df, :a)) == collect(3:12)
@test findall(nonunique(df, [:a, :c])) == collect(7:12)
@test findall(nonunique(df, r"[ac]")) == collect(7:12)
@test findall(nonunique(df, Not(2))) == collect(7:12)
@test findall(nonunique(df, Not([2]))) == collect(7:12)
@test findall(nonunique(df, Not(:b))) == collect(7:12)
@test findall(nonunique(df, Not([:b]))) == collect(7:12)
@test findall(nonunique(df, Not([false, true, false]))) == collect(7:12)
@test findall(nonunique(df, [1, 3])) == collect(7:12)
@test findall(nonunique(df, 1)) == collect(3:12)

@test unique(df) == df1
@test unique(df, :) == df1
@test unique(df, Colon()) == df1
@test unique(df, 2:3) == df1
@test unique(df, 3) == df1[1:3,:]
@test unique(df, [1, 3]) == df1
@test unique(df, [:a, :c]) == df1
@test unique(df, r"[ac]") == df1
@test unique(df, Not(2)) == df1
@test unique(df, Not([2])) == df1
@test unique(df, Not(:b)) == df1
@test unique(df, Not([:b])) == df1
@test unique(df, Not([false, true, false])) == df1
@test unique(df, :a) == df1[1:2,:]
@test_throws ArgumentError unique(DataFrame())
@test_throws ArgumentError nonunique(DataFrame())

unique!(df, [1, 3])
@test df == df1
for cols in (r"[ac]", Not(:b), Not(2), Not([:b]), Not([2]), Not([false, true, false]))
df = vcat(df1, df1)
unique!(df, cols)
@test df == df1
end
end

#test filter() and filter!()
df = DataFrame(x = [3, 1, 2, 1], y = ["b", "c", "a", "b"])
@test filter(r -> r[:x] > 1, df) == DataFrame(x = [3, 2], y = ["b", "a"])
@test filter!(r -> r[:x] > 1, df) === df == DataFrame(x = [3, 2], y = ["b", "a"])
df = DataFrame(x = [3, 1, 2, 1, missing], y = ["b", "c", "a", "b", "c"])
@test_throws TypeError filter(r -> r[:x] > 1, df)
@test_throws TypeError filter!(r -> r[:x] > 1, df)
@testset "filter() and filter!()" begin
df = DataFrame(x = [3, 1, 2, 1], y = ["b", "c", "a", "b"])
@test filter(r -> r[:x] > 1, df) == DataFrame(x = [3, 2], y = ["b", "a"])
@test filter!(r -> r[:x] > 1, df) === df == DataFrame(x = [3, 2], y = ["b", "a"])
df = DataFrame(x = [3, 1, 2, 1, missing], y = ["b", "c", "a", "b", "c"])
@test_throws TypeError filter(r -> r[:x] > 1, df)
@test_throws TypeError filter!(r -> r[:x] > 1, df)
end

end # module

0 comments on commit 1c8ca71

Please sign in to comment.