Make eachcol names argument default to false

JuliaData · Apr 1, 2019 · d4dfbb4 · d4dfbb4
1 parent 2b4e64d
commit d4dfbb4
Show file tree

Hide file tree

Showing 14 changed files with 132 additions and 160 deletions.
diff --git a/docs/src/lib/types.md b/docs/src/lib/types.md
@@ -42,11 +42,10 @@ Similarly, the `eachcol` function returns a value of the `DataFrameColumns` type
 serves as an iterator over columns of an `AbstractDataFrame`.
 The return value can have two concrete types:
 
-* If the `eachcol` function is called with the `names` argument set to `true` (currently the default,
-  but in the future the default will change to `false`) then it returns a value of the
+* If the `eachcol` function is called with the `names` argument set to `true` then it returns a value of the
   `DataFrameColumns{<:AbstractDataFrame, Pair{Symbol, AbstractVector}}` type, which is an
   iterator returning a pair containing the column name and the column vector.
-* If the `eachcol` function is called with `names` argument set to `false` then it returns a value of the
+* If the `eachcol` function is called with `names` argument set to `false` (the default) then it returns a value of the
   `DataFrameColumns{<:AbstractDataFrame, AbstractVector}` type, which is an
   iterator returning the column vector only.
 

diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl
@@ -204,7 +204,7 @@ eltypes(df)
 ```
 
 """
-eltypes(df::AbstractDataFrame) = eltype.(columns(df))
+eltypes(df::AbstractDataFrame) = eltype.(eachcol(df))
 
 Base.size(df::AbstractDataFrame) = (nrow(df), ncol(df))
 function Base.size(df::AbstractDataFrame, i::Integer)
@@ -244,7 +244,7 @@ that is different than the number of rows present in `df`.
 """
 function Base.similar(df::AbstractDataFrame, rows::Integer = size(df, 1))
     rows < 0 && throw(ArgumentError("the number of rows must be non-negative"))
-    DataFrame(AbstractVector[similar(x, rows) for x in columns(df)], copy(index(df)))
+    DataFrame(AbstractVector[similar(x, rows) for x in eachcol(df)], copy(index(df)))
 end
 
 ##############################################################################
@@ -341,15 +341,15 @@ describe(df::AbstractDataFrame, stats::Union{Symbol, Pair{<:Symbol}}...)
 **Arguments**
 
 * `df` : the `AbstractDataFrame`
-* `stats::Union{Symbol, Pair{<:Symbol}}...` : the summary statistics to report. 
-  Arguments can be: 
-    *  A symbol from the list `:mean`, `:std`, `:min`, `:q25`, 
-      `:median`, `:q75`, `:max`, `:eltype`, `:nunique`, `:first`, `:last`, and 
+* `stats::Union{Symbol, Pair{<:Symbol}}...` : the summary statistics to report.
+  Arguments can be:
+    *  A symbol from the list `:mean`, `:std`, `:min`, `:q25`,
+      `:median`, `:q75`, `:max`, `:eltype`, `:nunique`, `:first`, `:last`, and
       `:nmissing`. The default statistics used
       are `:mean`, `:min`, `:median`, `:max`, `:nunique`, `:nmissing`, and `:eltype`.
-    * `:all` as the only `Symbol` argument to return all statistics. 
+    * `:all` as the only `Symbol` argument to return all statistics.
     * A `name => function` pair where `name` is a `Symbol`. This will create
-      a column of summary statistics with the provided name. 
+      a column of summary statistics with the provided name.
 
 **Result**
 
@@ -388,16 +388,16 @@ describe(df, :min, :sum => sum)
 ```
 
 """
-StatsBase.describe(df::AbstractDataFrame, stats::Union{Symbol, Pair{Symbol}}...) = 
+StatsBase.describe(df::AbstractDataFrame, stats::Union{Symbol, Pair{Symbol}}...) =
     _describe(df, collect(stats))
 
 # TODO: un-comment this method definition after the deprecation period of
-# the `stats` keyword for `describe`. 
-# StatsBase.describe(df::AbstractDataFrame) = 
+# the `stats` keyword for `describe`.
+# StatsBase.describe(df::AbstractDataFrame) =
 #     _describe(df, [:mean, :min, :median, :max, :nunique, :nmissing, :eltype])
 
-function _describe(df::AbstractDataFrame, stats::AbstractVector)   
-    predefined_funs = Symbol[s for s in stats if s isa Symbol] 
+function _describe(df::AbstractDataFrame, stats::AbstractVector)
+    predefined_funs = Symbol[s for s in stats if s isa Symbol]
 
     allowed_fields = [:mean, :std, :min, :q25, :median, :q75,
                       :max, :nunique, :nmissing, :first, :last, :eltype]
@@ -406,8 +406,8 @@ function _describe(df::AbstractDataFrame, stats::AbstractVector)
         predefined_funs = allowed_fields
         i = findfirst(s -> s == :all, stats)
         splice!(stats, i, allowed_fields) # insert in the stats vector to get a good order
-    elseif :all in predefined_funs 
-        throw(ArgumentError("`:all` must be the only `Symbol` argument.")) 
+    elseif :all in predefined_funs
+        throw(ArgumentError("`:all` must be the only `Symbol` argument."))
     elseif !issubset(predefined_funs, allowed_fields)
         not_allowed = join(setdiff(predefined_funs, allowed_fields), ", :")
         allowed_msg = "\nAllowed fields are: :" * join(allowed_fields, ", :")
@@ -417,7 +417,7 @@ function _describe(df::AbstractDataFrame, stats::AbstractVector)
     custom_funs = Pair[s for s in stats if s isa Pair]
 
     ordered_names = [s isa Symbol ? s : s[1] for s in stats]
-    
+
     if !allunique(ordered_names)
         duplicate_names = unique(ordered_names[nonunique(DataFrame(ordered_names = ordered_names))])
         throw(ArgumentError("Duplicate names not allowed. Duplicated value(s) are: :$(join(duplicate_names, ", "))"))
@@ -428,7 +428,7 @@ function _describe(df::AbstractDataFrame, stats::AbstractVector)
     data[:variable] = names(df)
 
     # An array of Dicts for summary statistics
-    column_stats_dicts = map(columns(df)) do col
+    column_stats_dicts = map(eachcol(df)) do col
         if eltype(col) >: Missing
             t = collect(skipmissing(col))
             d = get_stats(t, predefined_funs)
@@ -438,11 +438,11 @@ function _describe(df::AbstractDataFrame, stats::AbstractVector)
             get_stats!(d, col, custom_funs)
         end
 
-        if :nmissing in predefined_funs 
+        if :nmissing in predefined_funs
             d[:nmissing] = eltype(col) >: Missing ? count(ismissing, col) : nothing
         end
 
-        if :first in predefined_funs 
+        if :first in predefined_funs
             d[:first] = isempty(col) ? nothing : first(col)
         end
 
@@ -509,7 +509,7 @@ function get_stats(col::AbstractVector, stats::AbstractVector{Symbol})
 end
 
 function get_stats!(d::Dict, col::AbstractVector, stats::AbstractVector{<:Pair})
-    for stat in stats 
+    for stat in stats
         d[stat[1]] = try stat[2](col) catch end
     end
 end
@@ -843,7 +843,7 @@ function Base.convert(::Type{Matrix{T}}, df::AbstractDataFrame) where T
     n, p = size(df)
     res = Matrix{T}(undef, n, p)
     idx = 1
-    for (name, col) in zip(names(df), columns(df))
+    for (name, col) in eachcol(df, true)
         try
             copyto!(res, idx, col)
         catch err

diff --git a/src/abstractdataframe/iteration.jl b/src/abstractdataframe/iteration.jl
@@ -99,13 +99,12 @@ struct DataFrameColumns{T<:AbstractDataFrame, V} <: AbstractVector{V}
 end
 
 """
-    eachcol(df::AbstractDataFrame, names::Bool=true)
+    eachcol(df::AbstractDataFrame, names::Bool=false)
 
 Return a `DataFrameColumns` that iterates an `AbstractDataFrame` column by column.
-If `names` is equal to `true` (currently the default, in the future the default
-will be set to `false`) iteration returns a pair consisting of column name
-and column vector.
-If `names` is equal to `false` then column vectors are yielded.
+If `names` is equal to `false` (the default) iteration returns column vectors.
+If `names` is equal to `true` pairs consisting of column name and column vector
+are yielded.
 
 **Examples**
 
@@ -120,48 +119,37 @@ julia> df = DataFrame(x=1:4, y=11:14)
 │ 3   │ 3     │ 13    │
 │ 4   │ 4     │ 14    │
 
-julia> collect(eachcol(df, true))
-2-element Array{Pair{Symbol,AbstractArray{T,1} where T},1}:
- :x => [1, 2, 3, 4]
- :y => [11, 12, 13, 14]
-
-julia> collect(eachcol(df, false))
+julia> collect(eachcol(df))
 2-element Array{AbstractArray{T,1} where T,1}:
  [1, 2, 3, 4]
  [11, 12, 13, 14]
 
-julia> sum.(eachcol(df, false))
-2-element Array{Int64,1}:
- 10
- 50
-
-julia> map(eachcol(df, false)) do col
+julia> map(eachcol(df)) do col
            maximum(col) - minimum(col)
        end
 2-element Array{Int64,1}:
  3
  3
+
+julia> sum.(eachcol(df))
+2-element Array{Int64,1}:
+ 10
+ 50
+
+julia> collect(eachcol(df, true))
+2-element Array{Pair{Symbol,AbstractArray{T,1} where T},1}:
+ :x => [1, 2, 3, 4]
+ :y => [11, 12, 13, 14]
 ```
 """
-@inline function eachcol(df::T, names::Bool) where T<: AbstractDataFrame
+@inline function eachcol(df::T, names::Bool=false) where T<: AbstractDataFrame
     if names
         DataFrameColumns{T, Pair{Symbol, AbstractVector}}(df)
     else
         DataFrameColumns{T, AbstractVector}(df)
     end
 end
 
-# TODO: remove this method after deprecation
-# and add default argument value above
-function eachcol(df::AbstractDataFrame)
-    Base.depwarn("In the future eachcol will have names argument set to false by default", :eachcol)
-    eachcol(df, true)
-end
-
-# TODO: remove this method after deprecation
-# this is left to make sure we do not forget to properly fix columns calls
-columns(df::AbstractDataFrame) = eachcol(df, false)
-
 Base.size(itr::DataFrameColumns) = (size(itr.df, 2),)
 Base.IndexStyle(::Type{<:DataFrameColumns}) = Base.IndexLinear()
 

diff --git a/src/abstractdataframe/join.jl b/src/abstractdataframe/join.jl
@@ -89,13 +89,13 @@ function compose_joined_table(joiner::DataFrameJoiner, kind::Symbol,
     cols = Vector{AbstractVector}(undef, ncleft + ncol(dfr_noon))
     # inner and left joins preserve non-missingness of the left frame
     _similar_left = kind == :inner || kind == :left ? similar : similar_missing
-    for (i, col) in enumerate(columns(joiner.dfl))
+    for (i, col) in enumerate(eachcol(joiner.dfl))
         cols[i] = _similar_left(col, nrow)
         copyto!(cols[i], view(col, all_orig_left_ixs))
     end
     # inner and right joins preserve non-missingness of the right frame
     _similar_right = kind == :inner || kind == :right ? similar : similar_missing
-    for (i, col) in enumerate(columns(dfr_noon))
+    for (i, col) in enumerate(eachcol(dfr_noon))
         cols[i+ncleft] = _similar_right(col, nrow)
         copyto!(cols[i+ncleft], view(col, all_orig_right_ixs))
         permute!(cols[i+ncleft], right_perm)
@@ -407,7 +407,7 @@ end
 function crossjoin(df1::AbstractDataFrame, df2::AbstractDataFrame; makeunique::Bool=false)
     r1, r2 = size(df1, 1), size(df2, 1)
     colindex = merge(index(df1), index(df2), makeunique=makeunique)
-    cols = Any[[repeat(c, inner=r2) for c in columns(df1)];
-               [repeat(c, outer=r1) for c in columns(df2)]]
+    cols = Any[[repeat(c, inner=r2) for c in eachcol(df1)];
+               [repeat(c, outer=r1) for c in eachcol(df2)]]
     DataFrame(cols, colindex)
 end
diff --git a/src/deprecated.jl b/src/deprecated.jl
@@ -10,7 +10,7 @@ import Base: @deprecate
 
 import Base: keys, values, insert!
 @deprecate keys(df::AbstractDataFrame) names(df)
-@deprecate values(df::AbstractDataFrame) columns(df)
+@deprecate values(df::AbstractDataFrame) eachcol(df)
 @deprecate insert!(df::DataFrame, df2::AbstractDataFrame) (foreach(col -> df[col] = df2[col], names(df2)); df)
 
 @deprecate pool categorical
@@ -1325,11 +1325,11 @@ import Base: vcat
 @deprecate showcols(df::AbstractDataFrame, all::Bool=false, values::Bool=true) describe(df, :eltype, :nmissing, :first, :last)
 @deprecate showcols(io::IO, df::AbstractDataFrame, all::Bool=false, values::Bool=true) show(io, describe(df, :eltype, :nmissing, :first, :last), all)
 function StatsBase.describe(df::AbstractDataFrame; stats=nothing)
-    if stats === nothing 
-        _describe(df, [:mean, :min, :median, 
-                       :max, :nunique, :nmissing, 
+    if stats === nothing
+        _describe(df, [:mean, :min, :median,
+                       :max, :nunique, :nmissing,
                        :eltype])
-    elseif stats === :all 
+    elseif stats === :all
         Base.depwarn("The `stats` keyword argument has been deprecated. Use describe(df, stats...) instead.", :describe)
         _describe(df, [:mean, :std, :min, :q25, :median, :q75,
                        :max, :nunique, :nmissing, :first, :last, :eltype])

diff --git a/src/groupeddataframe/grouping.jl b/src/groupeddataframe/grouping.jl
@@ -683,7 +683,7 @@ function _combine(f::Union{AbstractVector{<:Pair}, Tuple{Vararg{Pair}},
                 incols = gd.parent[first(p)]
             else
                 df = gd.parent[collect(first(p))]
-                incols = NamedTuple{Tuple(names(df))}(columns(df))
+                incols = NamedTuple{Tuple(names(df))}(eachcol(df))
             end
             firstres = do_call(fun, gd, incols, 1)
             idx, outcols, _ = _combine_with_first(wrap(firstres), fun, gd, incols)
@@ -715,7 +715,7 @@ function _combine(f::Any, gd::GroupedDataFrame)
         fun = last(f)
     elseif f isa Pair
         df = gd.parent[collect(first(f))]
-        incols = NamedTuple{Tuple(names(df))}(columns(df))
+        incols = NamedTuple{Tuple(names(df))}(eachcol(df))
         fun = last(f)
     else
         incols = nothing

diff --git a/test/cat.jl b/test/cat.jl
@@ -1,7 +1,6 @@
 module TestCat
 
 using Test, Random, DataFrames
-using DataFrames: columns
 const ≅ = isequal
 
 #
@@ -158,7 +157,7 @@ end
 @testset "vcat >2 args" begin
     empty_dfs = [DataFrame(), DataFrame(), DataFrame()]
     @test vcat(empty_dfs...) == reduce(vcat, empty_dfs) == DataFrame()
-    
+
     df = DataFrame(x = trues(1), y = falses(1))
     dfs = [df, df, df]
     @test vcat(dfs...) ==reduce(vcat, dfs) == DataFrame(x = trues(3), y = falses(3))
@@ -167,13 +166,13 @@ end
 @testset "vcat mixed coltypes" begin
     df = vcat(DataFrame([[1]], [:x]), DataFrame([[1.0]], [:x]))
     @test df == DataFrame([[1.0, 1.0]], [:x])
-    @test typeof.(columns(df)) == [Vector{Float64}]
+    @test typeof.(eachcol(df)) == [Vector{Float64}]
     df = vcat(DataFrame([[1]], [:x]), DataFrame([["1"]], [:x]))
     @test df == DataFrame([[1, "1"]], [:x])
-    @test typeof.(columns(df)) == [Vector{Any}]
+    @test typeof.(eachcol(df)) == [Vector{Any}]
     df = vcat(DataFrame([Union{Missing, Int}[1]], [:x]), DataFrame([[1]], [:x]))
     @test df == DataFrame([[1, 1]], [:x])
-    @test typeof.(columns(df)) == [Vector{Union{Missing, Int}}]
+    @test typeof.(eachcol(df)) == [Vector{Union{Missing, Int}}]
     df = vcat(DataFrame([CategoricalArray([1])], [:x]), DataFrame([[1]], [:x]))
     @test df == DataFrame([[1, 1]], [:x])
     @test df[:x] isa Vector{Int}
@@ -188,14 +187,14 @@ end
     df = vcat(DataFrame([Union{Int, Missing}[1]], [:x]),
               DataFrame([["1"]], [:x]))
     @test df == DataFrame([[1, "1"]], [:x])
-    @test typeof.(columns(df)) == [Vector{Any}]
+    @test typeof.(eachcol(df)) == [Vector{Any}]
     df = vcat(DataFrame([CategoricalArray([1])], [:x]),
               DataFrame([CategoricalArray(["1"])], [:x]))
     @test df == DataFrame([[1, "1"]], [:x])
     @test df[:x] isa CategoricalVector{Any}
     df = vcat(DataFrame([trues(1)], [:x]), DataFrame([[false]], [:x]))
     @test df == DataFrame([[true, false]], [:x])
-    @test typeof.(columns(df)) == [Vector{Bool}]
+    @test typeof.(eachcol(df)) == [Vector{Bool}]
 end
 
 @testset "vcat out of order" begin
@@ -213,7 +212,7 @@ end
     @test vcat(df2, df1, df2) == DataFrame([[2, 4, 6, 7, 8, 9, 2, 4, 6],
                                             [8, 10, 12, 4, 5, 6, 8, 10, 12],
                                             [14, 16, 18, 1, 2, 3, 14, 16, 18]] ,[:C, :B, :A])
-    
+
     @test size(vcat(df1, df1, df1, df2, df2, df2)) == (18, 3)
     df3 = df1[[1, 3, 2]]
     res = vcat(df1, df1, df1, df2, df2, df2, df3, df3, df3, df3)

diff --git a/test/constructors.jl b/test/constructors.jl
@@ -2,7 +2,6 @@ module TestConstructors
 
 using Test, DataFrames
 using DataFrames: Index, _columns, index
-using DataFrames: columns
 const ≅ = isequal
 
 #
@@ -147,13 +146,13 @@ end
 @testset "column types" begin
     df = DataFrame(A = 1:3, B = 2:4, C = 3:5)
     answer = [Array{Int,1}, Array{Int,1}, Array{Int,1}]
-    @test map(typeof, columns(df)) == answer
+    @test map(typeof, eachcol(df)) == answer
     df[:D] = [4, 5, missing]
     push!(answer, Vector{Union{Int, Missing}})
-    @test map(typeof, columns(df)) == answer
+    @test map(typeof, eachcol(df)) == answer
     df[:E] = 'c'
     push!(answer, Vector{Char})
-    @test map(typeof, columns(df)) == answer
+    @test map(typeof, eachcol(df)) == answer
 end
 
 @testset "categorical constructor" begin