From d4dfbb426116b28093eb3cf29c4fd4c09cdbb7b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 1 Apr 2019 20:01:14 +0200 Subject: [PATCH] Make eachcol names argument default to false --- docs/src/lib/types.md | 5 +- src/abstractdataframe/abstractdataframe.jl | 42 +++++------ src/abstractdataframe/iteration.jl | 46 +++++------- src/abstractdataframe/join.jl | 8 +-- src/deprecated.jl | 10 +-- src/groupeddataframe/grouping.jl | 4 +- test/cat.jl | 15 ++-- test/constructors.jl | 7 +- test/dataframe.jl | 27 ++++---- test/dataframerow.jl | 3 +- test/indexing.jl | 10 +-- test/iteration.jl | 30 +++----- test/join.jl | 81 +++++++++++----------- test/subdataframe.jl | 4 +- 14 files changed, 132 insertions(+), 160 deletions(-) diff --git a/docs/src/lib/types.md b/docs/src/lib/types.md index 72646e5834..dc3f747c28 100644 --- a/docs/src/lib/types.md +++ b/docs/src/lib/types.md @@ -42,11 +42,10 @@ Similarly, the `eachcol` function returns a value of the `DataFrameColumns` type serves as an iterator over columns of an `AbstractDataFrame`. The return value can have two concrete types: -* If the `eachcol` function is called with the `names` argument set to `true` (currently the default, - but in the future the default will change to `false`) then it returns a value of the +* If the `eachcol` function is called with the `names` argument set to `true` then it returns a value of the `DataFrameColumns{<:AbstractDataFrame, Pair{Symbol, AbstractVector}}` type, which is an iterator returning a pair containing the column name and the column vector. -* If the `eachcol` function is called with `names` argument set to `false` then it returns a value of the +* If the `eachcol` function is called with `names` argument set to `false` (the default) then it returns a value of the `DataFrameColumns{<:AbstractDataFrame, AbstractVector}` type, which is an iterator returning the column vector only. diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl index 91f2a6df91..8089fbc8fc 100644 --- a/src/abstractdataframe/abstractdataframe.jl +++ b/src/abstractdataframe/abstractdataframe.jl @@ -204,7 +204,7 @@ eltypes(df) ``` """ -eltypes(df::AbstractDataFrame) = eltype.(columns(df)) +eltypes(df::AbstractDataFrame) = eltype.(eachcol(df)) Base.size(df::AbstractDataFrame) = (nrow(df), ncol(df)) function Base.size(df::AbstractDataFrame, i::Integer) @@ -244,7 +244,7 @@ that is different than the number of rows present in `df`. """ function Base.similar(df::AbstractDataFrame, rows::Integer = size(df, 1)) rows < 0 && throw(ArgumentError("the number of rows must be non-negative")) - DataFrame(AbstractVector[similar(x, rows) for x in columns(df)], copy(index(df))) + DataFrame(AbstractVector[similar(x, rows) for x in eachcol(df)], copy(index(df))) end ############################################################################## @@ -341,15 +341,15 @@ describe(df::AbstractDataFrame, stats::Union{Symbol, Pair{<:Symbol}}...) **Arguments** * `df` : the `AbstractDataFrame` -* `stats::Union{Symbol, Pair{<:Symbol}}...` : the summary statistics to report. - Arguments can be: - * A symbol from the list `:mean`, `:std`, `:min`, `:q25`, - `:median`, `:q75`, `:max`, `:eltype`, `:nunique`, `:first`, `:last`, and +* `stats::Union{Symbol, Pair{<:Symbol}}...` : the summary statistics to report. + Arguments can be: + * A symbol from the list `:mean`, `:std`, `:min`, `:q25`, + `:median`, `:q75`, `:max`, `:eltype`, `:nunique`, `:first`, `:last`, and `:nmissing`. The default statistics used are `:mean`, `:min`, `:median`, `:max`, `:nunique`, `:nmissing`, and `:eltype`. - * `:all` as the only `Symbol` argument to return all statistics. + * `:all` as the only `Symbol` argument to return all statistics. * A `name => function` pair where `name` is a `Symbol`. This will create - a column of summary statistics with the provided name. + a column of summary statistics with the provided name. **Result** @@ -388,16 +388,16 @@ describe(df, :min, :sum => sum) ``` """ -StatsBase.describe(df::AbstractDataFrame, stats::Union{Symbol, Pair{Symbol}}...) = +StatsBase.describe(df::AbstractDataFrame, stats::Union{Symbol, Pair{Symbol}}...) = _describe(df, collect(stats)) # TODO: un-comment this method definition after the deprecation period of -# the `stats` keyword for `describe`. -# StatsBase.describe(df::AbstractDataFrame) = +# the `stats` keyword for `describe`. +# StatsBase.describe(df::AbstractDataFrame) = # _describe(df, [:mean, :min, :median, :max, :nunique, :nmissing, :eltype]) -function _describe(df::AbstractDataFrame, stats::AbstractVector) - predefined_funs = Symbol[s for s in stats if s isa Symbol] +function _describe(df::AbstractDataFrame, stats::AbstractVector) + predefined_funs = Symbol[s for s in stats if s isa Symbol] allowed_fields = [:mean, :std, :min, :q25, :median, :q75, :max, :nunique, :nmissing, :first, :last, :eltype] @@ -406,8 +406,8 @@ function _describe(df::AbstractDataFrame, stats::AbstractVector) predefined_funs = allowed_fields i = findfirst(s -> s == :all, stats) splice!(stats, i, allowed_fields) # insert in the stats vector to get a good order - elseif :all in predefined_funs - throw(ArgumentError("`:all` must be the only `Symbol` argument.")) + elseif :all in predefined_funs + throw(ArgumentError("`:all` must be the only `Symbol` argument.")) elseif !issubset(predefined_funs, allowed_fields) not_allowed = join(setdiff(predefined_funs, allowed_fields), ", :") allowed_msg = "\nAllowed fields are: :" * join(allowed_fields, ", :") @@ -417,7 +417,7 @@ function _describe(df::AbstractDataFrame, stats::AbstractVector) custom_funs = Pair[s for s in stats if s isa Pair] ordered_names = [s isa Symbol ? s : s[1] for s in stats] - + if !allunique(ordered_names) duplicate_names = unique(ordered_names[nonunique(DataFrame(ordered_names = ordered_names))]) throw(ArgumentError("Duplicate names not allowed. Duplicated value(s) are: :$(join(duplicate_names, ", "))")) @@ -428,7 +428,7 @@ function _describe(df::AbstractDataFrame, stats::AbstractVector) data[:variable] = names(df) # An array of Dicts for summary statistics - column_stats_dicts = map(columns(df)) do col + column_stats_dicts = map(eachcol(df)) do col if eltype(col) >: Missing t = collect(skipmissing(col)) d = get_stats(t, predefined_funs) @@ -438,11 +438,11 @@ function _describe(df::AbstractDataFrame, stats::AbstractVector) get_stats!(d, col, custom_funs) end - if :nmissing in predefined_funs + if :nmissing in predefined_funs d[:nmissing] = eltype(col) >: Missing ? count(ismissing, col) : nothing end - if :first in predefined_funs + if :first in predefined_funs d[:first] = isempty(col) ? nothing : first(col) end @@ -509,7 +509,7 @@ function get_stats(col::AbstractVector, stats::AbstractVector{Symbol}) end function get_stats!(d::Dict, col::AbstractVector, stats::AbstractVector{<:Pair}) - for stat in stats + for stat in stats d[stat[1]] = try stat[2](col) catch end end end @@ -843,7 +843,7 @@ function Base.convert(::Type{Matrix{T}}, df::AbstractDataFrame) where T n, p = size(df) res = Matrix{T}(undef, n, p) idx = 1 - for (name, col) in zip(names(df), columns(df)) + for (name, col) in eachcol(df, true) try copyto!(res, idx, col) catch err diff --git a/src/abstractdataframe/iteration.jl b/src/abstractdataframe/iteration.jl index 889735cafc..d394d0de6a 100644 --- a/src/abstractdataframe/iteration.jl +++ b/src/abstractdataframe/iteration.jl @@ -99,13 +99,12 @@ struct DataFrameColumns{T<:AbstractDataFrame, V} <: AbstractVector{V} end """ - eachcol(df::AbstractDataFrame, names::Bool=true) + eachcol(df::AbstractDataFrame, names::Bool=false) Return a `DataFrameColumns` that iterates an `AbstractDataFrame` column by column. -If `names` is equal to `true` (currently the default, in the future the default -will be set to `false`) iteration returns a pair consisting of column name -and column vector. -If `names` is equal to `false` then column vectors are yielded. +If `names` is equal to `false` (the default) iteration returns column vectors. +If `names` is equal to `true` pairs consisting of column name and column vector +are yielded. **Examples** @@ -120,30 +119,30 @@ julia> df = DataFrame(x=1:4, y=11:14) │ 3 │ 3 │ 13 │ │ 4 │ 4 │ 14 │ -julia> collect(eachcol(df, true)) -2-element Array{Pair{Symbol,AbstractArray{T,1} where T},1}: - :x => [1, 2, 3, 4] - :y => [11, 12, 13, 14] - -julia> collect(eachcol(df, false)) +julia> collect(eachcol(df)) 2-element Array{AbstractArray{T,1} where T,1}: [1, 2, 3, 4] [11, 12, 13, 14] -julia> sum.(eachcol(df, false)) -2-element Array{Int64,1}: - 10 - 50 - -julia> map(eachcol(df, false)) do col +julia> map(eachcol(df)) do col maximum(col) - minimum(col) end 2-element Array{Int64,1}: 3 3 + +julia> sum.(eachcol(df)) +2-element Array{Int64,1}: + 10 + 50 + +julia> collect(eachcol(df, true)) +2-element Array{Pair{Symbol,AbstractArray{T,1} where T},1}: + :x => [1, 2, 3, 4] + :y => [11, 12, 13, 14] ``` """ -@inline function eachcol(df::T, names::Bool) where T<: AbstractDataFrame +@inline function eachcol(df::T, names::Bool=false) where T<: AbstractDataFrame if names DataFrameColumns{T, Pair{Symbol, AbstractVector}}(df) else @@ -151,17 +150,6 @@ julia> map(eachcol(df, false)) do col end end -# TODO: remove this method after deprecation -# and add default argument value above -function eachcol(df::AbstractDataFrame) - Base.depwarn("In the future eachcol will have names argument set to false by default", :eachcol) - eachcol(df, true) -end - -# TODO: remove this method after deprecation -# this is left to make sure we do not forget to properly fix columns calls -columns(df::AbstractDataFrame) = eachcol(df, false) - Base.size(itr::DataFrameColumns) = (size(itr.df, 2),) Base.IndexStyle(::Type{<:DataFrameColumns}) = Base.IndexLinear() diff --git a/src/abstractdataframe/join.jl b/src/abstractdataframe/join.jl index ef46c45560..2509d4f7b6 100644 --- a/src/abstractdataframe/join.jl +++ b/src/abstractdataframe/join.jl @@ -89,13 +89,13 @@ function compose_joined_table(joiner::DataFrameJoiner, kind::Symbol, cols = Vector{AbstractVector}(undef, ncleft + ncol(dfr_noon)) # inner and left joins preserve non-missingness of the left frame _similar_left = kind == :inner || kind == :left ? similar : similar_missing - for (i, col) in enumerate(columns(joiner.dfl)) + for (i, col) in enumerate(eachcol(joiner.dfl)) cols[i] = _similar_left(col, nrow) copyto!(cols[i], view(col, all_orig_left_ixs)) end # inner and right joins preserve non-missingness of the right frame _similar_right = kind == :inner || kind == :right ? similar : similar_missing - for (i, col) in enumerate(columns(dfr_noon)) + for (i, col) in enumerate(eachcol(dfr_noon)) cols[i+ncleft] = _similar_right(col, nrow) copyto!(cols[i+ncleft], view(col, all_orig_right_ixs)) permute!(cols[i+ncleft], right_perm) @@ -407,7 +407,7 @@ end function crossjoin(df1::AbstractDataFrame, df2::AbstractDataFrame; makeunique::Bool=false) r1, r2 = size(df1, 1), size(df2, 1) colindex = merge(index(df1), index(df2), makeunique=makeunique) - cols = Any[[repeat(c, inner=r2) for c in columns(df1)]; - [repeat(c, outer=r1) for c in columns(df2)]] + cols = Any[[repeat(c, inner=r2) for c in eachcol(df1)]; + [repeat(c, outer=r1) for c in eachcol(df2)]] DataFrame(cols, colindex) end diff --git a/src/deprecated.jl b/src/deprecated.jl index 0f0c9627fa..10f4668243 100644 --- a/src/deprecated.jl +++ b/src/deprecated.jl @@ -10,7 +10,7 @@ import Base: @deprecate import Base: keys, values, insert! @deprecate keys(df::AbstractDataFrame) names(df) -@deprecate values(df::AbstractDataFrame) columns(df) +@deprecate values(df::AbstractDataFrame) eachcol(df) @deprecate insert!(df::DataFrame, df2::AbstractDataFrame) (foreach(col -> df[col] = df2[col], names(df2)); df) @deprecate pool categorical @@ -1325,11 +1325,11 @@ import Base: vcat @deprecate showcols(df::AbstractDataFrame, all::Bool=false, values::Bool=true) describe(df, :eltype, :nmissing, :first, :last) @deprecate showcols(io::IO, df::AbstractDataFrame, all::Bool=false, values::Bool=true) show(io, describe(df, :eltype, :nmissing, :first, :last), all) function StatsBase.describe(df::AbstractDataFrame; stats=nothing) - if stats === nothing - _describe(df, [:mean, :min, :median, - :max, :nunique, :nmissing, + if stats === nothing + _describe(df, [:mean, :min, :median, + :max, :nunique, :nmissing, :eltype]) - elseif stats === :all + elseif stats === :all Base.depwarn("The `stats` keyword argument has been deprecated. Use describe(df, stats...) instead.", :describe) _describe(df, [:mean, :std, :min, :q25, :median, :q75, :max, :nunique, :nmissing, :first, :last, :eltype]) diff --git a/src/groupeddataframe/grouping.jl b/src/groupeddataframe/grouping.jl index da80506740..73584c21aa 100644 --- a/src/groupeddataframe/grouping.jl +++ b/src/groupeddataframe/grouping.jl @@ -683,7 +683,7 @@ function _combine(f::Union{AbstractVector{<:Pair}, Tuple{Vararg{Pair}}, incols = gd.parent[first(p)] else df = gd.parent[collect(first(p))] - incols = NamedTuple{Tuple(names(df))}(columns(df)) + incols = NamedTuple{Tuple(names(df))}(eachcol(df)) end firstres = do_call(fun, gd, incols, 1) idx, outcols, _ = _combine_with_first(wrap(firstres), fun, gd, incols) @@ -715,7 +715,7 @@ function _combine(f::Any, gd::GroupedDataFrame) fun = last(f) elseif f isa Pair df = gd.parent[collect(first(f))] - incols = NamedTuple{Tuple(names(df))}(columns(df)) + incols = NamedTuple{Tuple(names(df))}(eachcol(df)) fun = last(f) else incols = nothing diff --git a/test/cat.jl b/test/cat.jl index 9f4371c9a0..cd576d6f95 100644 --- a/test/cat.jl +++ b/test/cat.jl @@ -1,7 +1,6 @@ module TestCat using Test, Random, DataFrames -using DataFrames: columns const ≅ = isequal # @@ -158,7 +157,7 @@ end @testset "vcat >2 args" begin empty_dfs = [DataFrame(), DataFrame(), DataFrame()] @test vcat(empty_dfs...) == reduce(vcat, empty_dfs) == DataFrame() - + df = DataFrame(x = trues(1), y = falses(1)) dfs = [df, df, df] @test vcat(dfs...) ==reduce(vcat, dfs) == DataFrame(x = trues(3), y = falses(3)) @@ -167,13 +166,13 @@ end @testset "vcat mixed coltypes" begin df = vcat(DataFrame([[1]], [:x]), DataFrame([[1.0]], [:x])) @test df == DataFrame([[1.0, 1.0]], [:x]) - @test typeof.(columns(df)) == [Vector{Float64}] + @test typeof.(eachcol(df)) == [Vector{Float64}] df = vcat(DataFrame([[1]], [:x]), DataFrame([["1"]], [:x])) @test df == DataFrame([[1, "1"]], [:x]) - @test typeof.(columns(df)) == [Vector{Any}] + @test typeof.(eachcol(df)) == [Vector{Any}] df = vcat(DataFrame([Union{Missing, Int}[1]], [:x]), DataFrame([[1]], [:x])) @test df == DataFrame([[1, 1]], [:x]) - @test typeof.(columns(df)) == [Vector{Union{Missing, Int}}] + @test typeof.(eachcol(df)) == [Vector{Union{Missing, Int}}] df = vcat(DataFrame([CategoricalArray([1])], [:x]), DataFrame([[1]], [:x])) @test df == DataFrame([[1, 1]], [:x]) @test df[:x] isa Vector{Int} @@ -188,14 +187,14 @@ end df = vcat(DataFrame([Union{Int, Missing}[1]], [:x]), DataFrame([["1"]], [:x])) @test df == DataFrame([[1, "1"]], [:x]) - @test typeof.(columns(df)) == [Vector{Any}] + @test typeof.(eachcol(df)) == [Vector{Any}] df = vcat(DataFrame([CategoricalArray([1])], [:x]), DataFrame([CategoricalArray(["1"])], [:x])) @test df == DataFrame([[1, "1"]], [:x]) @test df[:x] isa CategoricalVector{Any} df = vcat(DataFrame([trues(1)], [:x]), DataFrame([[false]], [:x])) @test df == DataFrame([[true, false]], [:x]) - @test typeof.(columns(df)) == [Vector{Bool}] + @test typeof.(eachcol(df)) == [Vector{Bool}] end @testset "vcat out of order" begin @@ -213,7 +212,7 @@ end @test vcat(df2, df1, df2) == DataFrame([[2, 4, 6, 7, 8, 9, 2, 4, 6], [8, 10, 12, 4, 5, 6, 8, 10, 12], [14, 16, 18, 1, 2, 3, 14, 16, 18]] ,[:C, :B, :A]) - + @test size(vcat(df1, df1, df1, df2, df2, df2)) == (18, 3) df3 = df1[[1, 3, 2]] res = vcat(df1, df1, df1, df2, df2, df2, df3, df3, df3, df3) diff --git a/test/constructors.jl b/test/constructors.jl index a4dc846fcd..da9d1cd7a9 100644 --- a/test/constructors.jl +++ b/test/constructors.jl @@ -2,7 +2,6 @@ module TestConstructors using Test, DataFrames using DataFrames: Index, _columns, index -using DataFrames: columns const ≅ = isequal # @@ -147,13 +146,13 @@ end @testset "column types" begin df = DataFrame(A = 1:3, B = 2:4, C = 3:5) answer = [Array{Int,1}, Array{Int,1}, Array{Int,1}] - @test map(typeof, columns(df)) == answer + @test map(typeof, eachcol(df)) == answer df[:D] = [4, 5, missing] push!(answer, Vector{Union{Int, Missing}}) - @test map(typeof, columns(df)) == answer + @test map(typeof, eachcol(df)) == answer df[:E] = 'c' push!(answer, Vector{Char}) - @test map(typeof, columns(df)) == answer + @test map(typeof, eachcol(df)) == answer end @testset "categorical constructor" begin diff --git a/test/dataframe.jl b/test/dataframe.jl index 94c04bf62b..c12ce98e2e 100644 --- a/test/dataframe.jl +++ b/test/dataframe.jl @@ -2,7 +2,6 @@ module TestDataFrame using Dates, DataFrames, LinearAlgebra, Statistics, Random, Test using DataFrames: _columns -using DataFrames: columns const ≅ = isequal const ≇ = !isequal @@ -62,7 +61,7 @@ end c = CategoricalArray{Union{Float64, Missing}}(undef, 2)) # https://github.com/JuliaData/Missings.jl/issues/66 # @test missingdf ≅ similar(df, 2) - @test typeof.(columns(similar(df, 2))) == typeof.(columns(missingdf)) + @test typeof.(eachcol(similar(df, 2))) == typeof.(eachcol(missingdf)) @test size(similar(df, 2)) == size(missingdf) end @@ -73,15 +72,15 @@ end @test haskey(df, 1) @test_throws MethodError haskey(df, 1.5) @test_throws ArgumentError haskey(df, true) - @test get(df, :a, -1) === columns(df)[1] + @test get(df, :a, -1) === eachcol(df)[1] @test get(df, :c, -1) == -1 @test !isempty(df) dfv = view(df, 1:2, 1:2) - @test get(df, :a, -1) === columns(df)[1] + @test get(df, :a, -1) === eachcol(df)[1] @test empty!(df) === df - @test isempty(columns(df)) + @test isempty(eachcol(df)) @test isempty(df) @test isempty(DataFrame(a=[], b=[])) @@ -458,7 +457,7 @@ end # Test that it works on a custom function describe_output.test_std = describe_output.std - # Test that describe works with a Pair and a symbol + # Test that describe works with a Pair and a symbol @test describe_output[[:variable, :mean, :test_std]] ≅ describe(df, :mean, :test_std => std) # Test that describe works with a dataframe with no observations @@ -466,7 +465,7 @@ end @test describe(df, :mean) ≅ DataFrame(variable = [:a, :b, :c], mean = [NaN, nothing, nothing]) - @test_throws ArgumentError describe(df, :mean, :all) + @test_throws ArgumentError describe(df, :mean, :all) end #Check the output of unstack @@ -572,11 +571,11 @@ end df = DataFrame(A = Vector{Union{Int, Missing}}(1:3), B = Vector{Union{Int, Missing}}(4:6)) DRT = CategoricalArrays.DefaultRefType -@test all(c -> isa(c, Vector{Union{Int, Missing}}), columns(categorical!(deepcopy(df)))) +@test all(c -> isa(c, Vector{Union{Int, Missing}}), eachcol(categorical!(deepcopy(df)))) @test all(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}}, - columns(categorical!(deepcopy(df), [1,2]))) + eachcol(categorical!(deepcopy(df), [1,2]))) @test all(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}}, - columns(categorical!(deepcopy(df), [:A,:B]))) + eachcol(categorical!(deepcopy(df), [:A,:B]))) @test findfirst(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}}, _columns(categorical!(deepcopy(df), [:A]))) == 1 @test findfirst(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}}, @@ -619,7 +618,7 @@ end Union{Int, Missing}[2, 6], Union{Int, Missing}[3, 7], Union{Int, Missing}[4, 8]], [:id, :a, :b, :c, :d]) @test isa(udf[1], Vector{Int}) - @test all(isa.(columns(udf)[2:end], Vector{Union{Int, Missing}})) + @test all(isa.(eachcol(udf)[2:end], Vector{Union{Int, Missing}})) df = DataFrame([categorical(repeat(1:2, inner=4)), categorical(repeat('a':'d', outer=2)), categorical(1:8)], [:id, :variable, :value]) @@ -629,7 +628,7 @@ end Union{Int, Missing}[2, 6], Union{Int, Missing}[3, 7], Union{Int, Missing}[4, 8]], [:id, :a, :b, :c, :d]) @test isa(udf[1], CategoricalVector{Int}) - @test all(isa.(columns(udf)[2:end], CategoricalVector{Union{Int, Missing}})) + @test all(isa.(eachcol(udf)[2:end], CategoricalVector{Union{Int, Missing}})) end @testset "duplicate entries in unstack warnings" begin @@ -771,14 +770,14 @@ end df = DataFrame([CategoricalArray(1:10), CategoricalArray(string.('a':'j'))]) allowmissing!(df) - @test all(x->x <: CategoricalVector, typeof.(columns(df))) + @test all(x->x <: CategoricalVector, typeof.(eachcol(df))) @test eltypes(df)[1] <: Union{CategoricalValue{Int}, Missing} @test eltypes(df)[2] <: Union{CategoricalString, Missing} df[1,2] = missing @test_throws MissingException disallowmissing!(df) df[1,2] = "a" disallowmissing!(df) - @test all(x->x <: CategoricalVector, typeof.(columns(df))) + @test all(x->x <: CategoricalVector, typeof.(eachcol(df))) @test eltypes(df)[1] <: CategoricalValue{Int} @test eltypes(df)[2] <: CategoricalString end diff --git a/test/dataframerow.jl b/test/dataframerow.jl index 04d36bb69c..03341e0dd4 100644 --- a/test/dataframerow.jl +++ b/test/dataframerow.jl @@ -1,7 +1,6 @@ module TestDataFrameRow using Test, DataFrames, Random -using DataFrames: columns ref_df = DataFrame(a=Union{Int, Missing}[1, 2, 3, 1, 2, 2], b=[2.0, missing, 1.2, 2.0, missing, missing], @@ -136,7 +135,7 @@ end @test hash(DataFrameRow(df, 2, :)) != hash(DataFrameRow(df, 6, :)) # check that hashrows() function generates the same hashes as DataFrameRow - df_rowhashes, _ = DataFrames.hashrows(Tuple(columns(df)), false) + df_rowhashes, _ = DataFrames.hashrows(Tuple(eachcol(df)), false) @test df_rowhashes == [hash(dr) for dr in eachrow(df)] end diff --git a/test/indexing.jl b/test/indexing.jl index 392ff7d1e2..9ed1af2fad 100644 --- a/test/indexing.jl +++ b/test/indexing.jl @@ -6,12 +6,12 @@ using Test, DataFrames df = DataFrame(a=1:3, b=4:6, c=7:9) @test df[1] == [1, 2, 3] - @test df[1] === eachcol(df, false)[1] + @test df[1] === eachcol(df)[1] @test df[1:2] == DataFrame(a=1:3, b=4:6) - @test eachcol(df[1:2], false)[1] === eachcol(df, false)[1] + @test eachcol(df[1:2])[1] === eachcol(df)[1] === last(eachcol(df, true)[1]) @test df[:] == df @test df[:] !== df - @test eachcol(df[:], false)[1] === eachcol(df, false)[1] + @test eachcol(df[:])[1] === eachcol(df)[1] === last(eachcol(df, true)[1]) @test df[1, 1] == 1 @test df[1, 1:2] isa DataFrameRow @@ -27,9 +27,9 @@ using Test, DataFrames @test df[:, 1] == [1, 2, 3] @test df[:, 1] !== df[1] @test df[:, 1:2] == DataFrame(a=1:3, b=4:6) - @test eachcol(df[:, 1:2], false)[1] !== df[1] + @test eachcol(df[:, 1:2])[1] !== df[1] @test df[:, :] == df - @test eachcol(df[:, :], false)[1] !== df[1] + @test eachcol(df[:, :])[1] !== df[1] end @testset "view DataFrame" begin diff --git a/test/iteration.jl b/test/iteration.jl index 2f502672b0..fc1ca96af7 100644 --- a/test/iteration.jl +++ b/test/iteration.jl @@ -1,11 +1,12 @@ module TestIteration using Test, DataFrames -using DataFrames: columns df = DataFrame(A = Vector{Union{Int, Missing}}(1:2), B = Vector{Union{Int, Missing}}(2:3)) @test size(eachrow(df)) == (size(df, 1),) +@test sprint(summary, eachrow(df)) == "2-element DataFrameRows" +@test Base.IndexStyle(eachrow(df)) == IndexLinear() @test eachrow(df)[1] == DataFrameRow(df, 1, :) @test collect(eachrow(df)) isa Vector{<:DataFrameRow} @test eltype(eachrow(df)) <: DataFrameRow @@ -16,38 +17,29 @@ for row in eachrow(df) @test collect(pairs(row)) isa Vector{Pair{Symbol, Int}} end -# TODO - clean up redundant tests after eachcol deprecation @test size(eachcol(df)) == (size(df, 2),) +@test Base.IndexStyle(eachcol(df)) == IndexLinear() @test size(eachcol(df, true)) == (size(df, 2),) -@test size(columns(df)) == (size(df, 2),) @test size(eachcol(df, false)) == (size(df, 2),) @test length(eachcol(df)) == size(df, 2) @test length(eachcol(df, true)) == size(df, 2) -@test length(columns(df)) == size(df, 2) @test length(eachcol(df, false)) == size(df, 2) -@test eachcol(df)[1] == (:A => df[1]) # this will be df[1] after eachcol deprecation +@test eachcol(df)[1] == df[1] @test eachcol(df, true)[1] == (:A => df[1]) -@test columns(df)[1] == df[1] @test eachcol(df, false)[1] == df[1] -@test collect(eachcol(df)) isa Vector{Pair{Symbol, AbstractVector}} @test collect(eachcol(df, true)) isa Vector{Pair{Symbol, AbstractVector}} -@test collect(eachcol(df)) == [:A => [1, 2], :B => [2, 3]] @test collect(eachcol(df, true)) == [:A => [1, 2], :B => [2, 3]] -@test collect(columns(df)) isa Vector{AbstractVector} +@test collect(eachcol(df)) isa Vector{AbstractVector} +@test collect(eachcol(df)) == [[1, 2], [2, 3]] @test collect(eachcol(df, false)) isa Vector{AbstractVector} -@test collect(columns(df)) == [[1, 2], [2, 3]] @test collect(eachcol(df, false)) == [[1, 2], [2, 3]] -@test eltype(eachcol(df)) == Pair{Symbol, AbstractVector} @test eltype(eachcol(df, true)) == Pair{Symbol, AbstractVector} -@test eltype(columns(df)) == AbstractVector @test eltype(eachcol(df, false)) == AbstractVector -for col in eachcol(df) - @test typeof(col) <: Pair{Symbol, <:AbstractVector} -end +@test eltype(eachcol(df)) == AbstractVector for col in eachcol(df, true) @test typeof(col) <: Pair{Symbol, <:AbstractVector} end -for col in columns(df) +for col in eachcol(df) @test isa(col, AbstractVector) end for col in eachcol(df, false) @@ -57,15 +49,13 @@ end @test map(x -> minimum(convert(Array, x)), eachrow(df)) == [1,2] @test map(Vector, eachrow(df)) == [[1, 2], [2, 3]] @test mapcols(minimum, df) == DataFrame(A = [1], B = [2]) -@test map(minimum, eachcol(df)) == DataFrame(A = [1], B = [2]) # this is deprecated @test map(minimum, eachcol(df, true)) == DataFrame(A = [1], B = [2]) # this is deprecated -@test map(minimum, columns(df)) == [1, 2] @test map(minimum, eachcol(df, false)) == [1, 2] +@test map(minimum, eachcol(df)) == [1, 2] @test eltypes(mapcols(Vector{Float64}, df)) == [Float64, Float64] -@test eltypes(map(Vector{Float64}, eachcol(df))) == [Float64, Float64] # this is deprecated @test eltypes(map(Vector{Float64}, eachcol(df, true))) == [Float64, Float64] # this is deprecated -@test eltype(map(Vector{Float64}, columns(df))) == Vector{Float64} @test eltype(map(Vector{Float64}, eachcol(df, false))) == Vector{Float64} +@test eltype(map(Vector{Float64}, eachcol(df))) == Vector{Float64} # test mapcols corner cases # this behavior might change when we rework setindex! to follow standard broadcasting rules diff --git a/test/join.jl b/test/join.jl index df42bfe21b..ccb17a4630 100644 --- a/test/join.jl +++ b/test/join.jl @@ -2,7 +2,6 @@ module TestJoin using Test, DataFrames using DataFrames: similar_missing -using DataFrames: columns const ≅ = isequal name = DataFrame(ID = Union{Int, Missing}[1, 2, 3], @@ -171,7 +170,7 @@ end repeat([0, 1, 2, 3, 4], outer = 3), repeat([0, 1, 2, 3, 4], outer = 3)], [:id, :fid, :id_1, :fid_1]) - @test typeof.(columns(join(df1, df2, kind=:cross, makeunique=true))) == + @test typeof.(eachcol(join(df1, df2, kind=:cross, makeunique=true))) == [Vector{Int}, Vector{Float64}, Vector{Int}, Vector{Float64}] i(on) = join(df1, df2, on = on, kind = :inner, makeunique=true) @@ -184,63 +183,63 @@ end @test s(:id) == s(:fid) == s([:id, :fid]) == DataFrame([[1, 3], [1, 3]], [:id, :fid]) - @test typeof.(columns(s(:id))) == - typeof.(columns(s(:fid))) == - typeof.(columns(s([:id, :fid]))) == [Vector{Int}, Vector{Float64}] + @test typeof.(eachcol(s(:id))) == + typeof.(eachcol(s(:fid))) == + typeof.(eachcol(s([:id, :fid]))) == [Vector{Int}, Vector{Float64}] @test a(:id) == a(:fid) == a([:id, :fid]) == DataFrame([[5], [5]], [:id, :fid]) - @test typeof.(columns(a(:id))) == - typeof.(columns(a(:fid))) == - typeof.(columns(a([:id, :fid]))) == [Vector{Int}, Vector{Float64}] + @test typeof.(eachcol(a(:id))) == + typeof.(eachcol(a(:fid))) == + typeof.(eachcol(a([:id, :fid]))) == [Vector{Int}, Vector{Float64}] on = :id @test i(on) == DataFrame([[1, 3], [1, 3], [1, 3]], [:id, :fid, :fid_1]) - @test typeof.(columns(i(on))) == [Vector{Int}, Vector{Float64}, Vector{Float64}] + @test typeof.(eachcol(i(on))) == [Vector{Int}, Vector{Float64}, Vector{Float64}] @test l(on) ≅ DataFrame(id = [1, 3, 5], fid = [1, 3, 5], fid_1 = [1, 3, missing]) - @test typeof.(columns(l(on))) == + @test typeof.(eachcol(l(on))) == [Vector{Int}, Vector{Float64}, Vector{Union{Float64, Missing}}] @test r(on) ≅ DataFrame(id = [1, 3, 0, 2, 4], fid = [1, 3, missing, missing, missing], fid_1 = [1, 3, 0, 2, 4]) - @test typeof.(columns(r(on))) == + @test typeof.(eachcol(r(on))) == [Vector{Int}, Vector{Union{Float64, Missing}}, Vector{Float64}] @test o(on) ≅ DataFrame(id = [1, 3, 5, 0, 2, 4], fid = [1, 3, 5, missing, missing, missing], fid_1 = [1, 3, missing, 0, 2, 4]) - @test typeof.(columns(o(on))) == + @test typeof.(eachcol(o(on))) == [Vector{Int}, Vector{Union{Float64, Missing}}, Vector{Union{Float64, Missing}}] on = :fid @test i(on) == DataFrame([[1, 3], [1.0, 3.0], [1, 3]], [:id, :fid, :id_1]) - @test typeof.(columns(i(on))) == [Vector{Int}, Vector{Float64}, Vector{Int}] + @test typeof.(eachcol(i(on))) == [Vector{Int}, Vector{Float64}, Vector{Int}] @test l(on) ≅ DataFrame(id = [1, 3, 5], fid = [1, 3, 5], id_1 = [1, 3, missing]) - @test typeof.(columns(l(on))) == [Vector{Int}, Vector{Float64}, + @test typeof.(eachcol(l(on))) == [Vector{Int}, Vector{Float64}, Vector{Union{Int, Missing}}] @test r(on) ≅ DataFrame(id = [1, 3, missing, missing, missing], fid = [1, 3, 0, 2, 4], id_1 = [1, 3, 0, 2, 4]) - @test typeof.(columns(r(on))) == [Vector{Union{Int, Missing}}, Vector{Float64}, + @test typeof.(eachcol(r(on))) == [Vector{Union{Int, Missing}}, Vector{Float64}, Vector{Int}] @test o(on) ≅ DataFrame(id = [1, 3, 5, missing, missing, missing], fid = [1, 3, 5, 0, 2, 4], id_1 = [1, 3, missing, 0, 2, 4]) - @test typeof.(columns(o(on))) == [Vector{Union{Int, Missing}}, Vector{Float64}, + @test typeof.(eachcol(o(on))) == [Vector{Union{Int, Missing}}, Vector{Float64}, Vector{Union{Int, Missing}}] on = [:id, :fid] @test i(on) == DataFrame([[1, 3], [1, 3]], [:id, :fid]) - @test typeof.(columns(i(on))) == [Vector{Int}, Vector{Float64}] + @test typeof.(eachcol(i(on))) == [Vector{Int}, Vector{Float64}] @test l(on) == DataFrame(id = [1, 3, 5], fid = [1, 3, 5]) - @test typeof.(columns(l(on))) == [Vector{Int}, Vector{Float64}] + @test typeof.(eachcol(l(on))) == [Vector{Int}, Vector{Float64}] @test r(on) == DataFrame(id = [1, 3, 0, 2, 4], fid = [1, 3, 0, 2, 4]) - @test typeof.(columns(r(on))) == [Vector{Int}, Vector{Float64}] + @test typeof.(eachcol(r(on))) == [Vector{Int}, Vector{Float64}] @test o(on) == DataFrame(id = [1, 3, 5, 0, 2, 4], fid = [1, 3, 5, 0, 2, 4]) - @test typeof.(columns(o(on))) == [Vector{Int}, Vector{Float64}] + @test typeof.(eachcol(o(on))) == [Vector{Int}, Vector{Float64}] end @testset "all joins with CategoricalArrays" begin @@ -255,7 +254,7 @@ end repeat([0, 1, 2, 3, 4], outer = 3), repeat([0, 1, 2, 3, 4], outer = 3)], [:id, :fid, :id_1, :fid_1]) - @test all(isa.(columns(join(df1, df2, kind=:cross, makeunique=true)), + @test all(isa.(eachcol(join(df1, df2, kind=:cross, makeunique=true)), [CategoricalVector{T} for T in (Int, Float64, Int, Float64)])) i(on) = join(df1, df2, on = on, kind = :inner, makeunique=true) @@ -268,76 +267,76 @@ end @test s(:id) == s(:fid) == s([:id, :fid]) == DataFrame([[1, 3], [1, 3]], [:id, :fid]) - @test typeof.(columns(s(:id))) == - typeof.(columns(s(:fid))) == - typeof.(columns(s([:id, :fid]))) - @test all(isa.(columns(s(:id)), + @test typeof.(eachcol(s(:id))) == + typeof.(eachcol(s(:fid))) == + typeof.(eachcol(s([:id, :fid]))) + @test all(isa.(eachcol(s(:id)), [CategoricalVector{T} for T in (Int, Float64)])) @test a(:id) == a(:fid) == a([:id, :fid]) == DataFrame([[5], [5]], [:id, :fid]) - @test typeof.(columns(a(:id))) == - typeof.(columns(a(:fid))) == - typeof.(columns(a([:id, :fid]))) - @test all(isa.(columns(a(:id)), + @test typeof.(eachcol(a(:id))) == + typeof.(eachcol(a(:fid))) == + typeof.(eachcol(a([:id, :fid]))) + @test all(isa.(eachcol(a(:id)), [CategoricalVector{T} for T in (Int, Float64)])) on = :id @test i(on) == DataFrame([[1, 3], [1, 3], [1, 3]], [:id, :fid, :fid_1]) - @test all(isa.(columns(i(on)), + @test all(isa.(eachcol(i(on)), [CategoricalVector{T} for T in (Int, Float64, Float64)])) @test l(on) ≅ DataFrame(id = [1, 3, 5], fid = [1, 3, 5], fid_1 = [1, 3, missing]) - @test all(isa.(columns(l(on)), + @test all(isa.(eachcol(l(on)), [CategoricalVector{T} for T in (Int,Float64,Union{Float64, Missing})])) @test r(on) ≅ DataFrame(id = [1, 3, 0, 2, 4], fid = [1, 3, missing, missing, missing], fid_1 = [1, 3, 0, 2, 4]) - @test all(isa.(columns(r(on)), + @test all(isa.(eachcol(r(on)), [CategoricalVector{T} for T in (Int,Union{Float64, Missing},Float64)])) @test o(on) ≅ DataFrame(id = [1, 3, 5, 0, 2, 4], fid = [1, 3, 5, missing, missing, missing], fid_1 = [1, 3, missing, 0, 2, 4]) - @test all(isa.(columns(o(on)), + @test all(isa.(eachcol(o(on)), [CategoricalVector{T} for T in (Int,Union{Float64,Missing},Union{Float64, Missing})])) on = :fid @test i(on) == DataFrame([[1, 3], [1.0, 3.0], [1, 3]], [:id, :fid, :id_1]) - @test all(isa.(columns(i(on)), + @test all(isa.(eachcol(i(on)), [CategoricalVector{T} for T in (Int, Float64, Int)])) @test l(on) ≅ DataFrame(id = [1, 3, 5], fid = [1, 3, 5], id_1 = [1, 3, missing]) - @test all(isa.(columns(l(on)), + @test all(isa.(eachcol(l(on)), [CategoricalVector{T} for T in (Int, Float64, Union{Int, Missing})])) @test r(on) ≅ DataFrame(id = [1, 3, missing, missing, missing], fid = [1, 3, 0, 2, 4], id_1 = [1, 3, 0, 2, 4]) - @test all(isa.(columns(r(on)), + @test all(isa.(eachcol(r(on)), [CategoricalVector{T} for T in (Union{Int, Missing}, Float64, Int)])) @test o(on) ≅ DataFrame(id = [1, 3, 5, missing, missing, missing], fid = [1, 3, 5, 0, 2, 4], id_1 = [1, 3, missing, 0, 2, 4]) - @test all(isa.(columns(o(on)), + @test all(isa.(eachcol(o(on)), [CategoricalVector{T} for T in (Union{Int, Missing}, Float64, Union{Int, Missing})])) on = [:id, :fid] @test i(on) == DataFrame([[1, 3], [1, 3]], [:id, :fid]) - @test all(isa.(columns(i(on)), + @test all(isa.(eachcol(i(on)), [CategoricalVector{T} for T in (Int, Float64)])) @test l(on) == DataFrame(id = [1, 3, 5], fid = [1, 3, 5]) - @test all(isa.(columns(l(on)), + @test all(isa.(eachcol(l(on)), [CategoricalVector{T} for T in (Int, Float64)])) @test r(on) == DataFrame(id = [1, 3, 0, 2, 4], fid = [1, 3, 0, 2, 4]) - @test all(isa.(columns(r(on)), + @test all(isa.(eachcol(r(on)), [CategoricalVector{T} for T in (Int, Float64)])) @test o(on) == DataFrame(id = [1, 3, 5, 0, 2, 4], fid = [1, 3, 5, 0, 2, 4]) - @test all(isa.(columns(o(on)), + @test all(isa.(eachcol(o(on)), [CategoricalVector{T} for T in (Int, Float64)])) end diff --git a/test/subdataframe.jl b/test/subdataframe.jl index d56de0e2b4..1a0aadb491 100644 --- a/test/subdataframe.jl +++ b/test/subdataframe.jl @@ -204,11 +204,11 @@ end df2 = DataFrame(sdf) @test df2 isa DataFrame @test df2 == df[[3,1,4], [3,2,1]] - @test all(x -> x isa Vector{Int}, eachcol(df2, false)) + @test all(x -> x isa Vector{Int}, eachcol(df2)) df2 = convert(DataFrame, sdf) @test df2 isa DataFrame @test df2 == df[[3,1,4], [3,2,1]] - @test all(x -> x isa Vector{Int}, eachcol(df2, false)) + @test all(x -> x isa Vector{Int}, eachcol(df2)) end end # module