deprecate names=true in eachcol

JuliaData · Feb 26, 2020 · 4276bfe · 4276bfe
1 parent 3f94ee4
commit 4276bfe
Show file tree

Hide file tree

Showing 15 changed files with 214 additions and 194 deletions.
diff --git a/docs/src/lib/types.md b/docs/src/lib/types.md
@@ -40,14 +40,6 @@ serves as an iterator over rows of an `AbstractDataFrame`, returning `DataFrameR
 
 Similarly, the `eachcol` function returns a value of the `DataFrameColumns` type, which
 serves as an iterator over columns of an `AbstractDataFrame`.
-The return value can have two concrete types:
-
-* If the `eachcol` function is called with the `names` argument set to `true` then it returns a value of the
-  `DataFrameColumns{<:AbstractDataFrame, Pair{Symbol, AbstractVector}}` type, which is an
-  iterator returning a pair containing the column name and the column vector.
-* If the `eachcol` function is called with `names` argument set to `false` (the default) then it returns a value of the
-  `DataFrameColumns{<:AbstractDataFrame, AbstractVector}` type, which is an
-  iterator returning the column vector only.
 
 The `DataFrameRows` and `DataFrameColumns` types are subtypes of `AbstractVector` and support its interface
 with the exception that they are read only. Note that they are not exported and should not be constructed directly,

diff --git a/docs/src/man/getting_started.md b/docs/src/man/getting_started.md
@@ -775,7 +775,7 @@ true
 julia> df[1] !== x
 true
 
-julia> eachcol(df, false)[1] === df.x
+julia> eachcol(df)[1] === df.x
 true
 ```
 

diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl
@@ -1011,7 +1011,7 @@ function Base.convert(::Type{Matrix{T}}, df::AbstractDataFrame) where T
     n, p = size(df)
     res = Matrix{T}(undef, n, p)
     idx = 1
-    for (name, col) in eachcol(df, true)
+    for (name, col) in pairs(eachcol(df))
         try
             copyto!(res, idx, col)
         catch err

diff --git a/src/abstractdataframe/io.jl b/src/abstractdataframe/io.jl
@@ -162,11 +162,11 @@ function Base.show(io::IO, mime::MIME"text/html", dfrs::DataFrameRows; summary::
     _show(io, mime, df, summary=false)
 end
 
-function Base.show(io::IO, mime::MIME"text/html", dfcs::DataFrameColumns{T,V};
-                   summary::Bool=true) where {T,V}
+function Base.show(io::IO, mime::MIME"text/html", dfcs::DataFrameColumns;
+                   summary::Bool=true)
     df = parent(dfcs)
     if summary
-        write(io, "<p>$(nrow(df))×$(ncol(df)) DataFrameColumns (with names=$(V <: Pair))</p>")
+        write(io, "<p>$(nrow(df))×$(ncol(df)) DataFrameColumns</p>")
     end
     _show(io, mime, df, summary=false)
 end

diff --git a/src/abstractdataframe/iteration.jl b/src/abstractdataframe/iteration.jl
@@ -90,30 +90,27 @@ Base.getproperty(itr::DataFrameRows, col_ind::Symbol) = getproperty(parent(itr),
 Base.propertynames(itr::DataFrameRows, private::Bool=false) = propertynames(parent(itr))
 
 # Iteration by columns
+
 """
-    DataFrameColumns{<:AbstractDataFrame, V} <: AbstractVector{V}
+    DataFrameColumns{<:AbstractDataFrame} <: AbstractVector{AbstractVector}
 
-Iterator over columns of an `AbstractDataFrame` constructed using
-[`eachcol(df, true)`](@ref) if `V` is a `Pair{Symbol,AbstractVector}`. Then each
-returned value is a pair consisting of column name and column vector.
-If `V` is an `AbstractVector` (a value returned by [`eachcol(df, false)`](@ref))
-then each returned value is a column vector.
+An `AbstractVector` that allows iteration over columns of an `AbstractDataFrame`.
+Indexing into `DataFrameColumns` objects using integer or symbol indices
+returns the corresponding column (without copying).
 """
-struct DataFrameColumns{T<:AbstractDataFrame, V} <: AbstractVector{V}
+struct DataFrameColumns{T<:AbstractDataFrame} <: AbstractVector{AbstractVector}
     df::T
 end
 
-Base.summary(dfcs::DataFrameColumns{T, V}) where {T,V} =
-    "$(length(dfcs))-element DataFrameColumns (with names=$(V <: Pair))"
+Base.summary(dfcs::DataFrameColumns)= "$(length(dfcs))-element DataFrameColumns"
 Base.summary(io::IO, dfcs::DataFrameColumns) = print(io, summary(dfcs))
 
 """
-    eachcol(df::AbstractDataFrame, names::Bool=false)
+    eachcol(df::AbstractDataFrame)
 
-Return a `DataFrameColumns` that iterates an `AbstractDataFrame` column by column.
-If `names` is equal to `false` (the default) iteration returns column vectors.
-If `names` is equal to `true` pairs consisting of column name and column vector
-are yielded.
+Return a `DataFrameColumns` that is an `AbstractVector`
+that allows iterating an `AbstractDataFrame` column by column.
+Additionally it is allowed to index `DataFrameColumns` using column names.
 
 # Examples
 ```jldoctest
@@ -143,39 +140,40 @@ julia> sum.(eachcol(df))
 2-element Array{Int64,1}:
  10
  50
-
-julia> collect(eachcol(df, true))
-2-element Array{Pair{Symbol,AbstractArray{T,1} where T},1}:
- :x => [1, 2, 3, 4]
- :y => [11, 12, 13, 14]
 ```
 """
-@inline function eachcol(df::T, names::Bool=false) where T<: AbstractDataFrame
-    if names
-        DataFrameColumns{T, Pair{Symbol, AbstractVector}}(df)
-    else
-        DataFrameColumns{T, AbstractVector}(df)
-    end
-end
+eachcol(df::AbstractDataFrame) = DataFrameColumns(df)
 
 Base.size(itr::DataFrameColumns) = (size(parent(itr), 2),)
 Base.IndexStyle(::Type{<:DataFrameColumns}) = Base.IndexLinear()
 
-@inline function Base.getindex(itr::DataFrameColumns{<:AbstractDataFrame,
-                                                     Pair{Symbol, AbstractVector}}, j::Int)
-    @boundscheck checkbounds(itr, j)
-    @inbounds _names(parent(itr))[j] => parent(itr)[!, j]
-end
-
-@inline function Base.getindex(itr::DataFrameColumns{<:AbstractDataFrame, AbstractVector}, j::Int)
+@inline function Base.getindex(itr::DataFrameColumns, j::Int)
     @boundscheck checkbounds(itr, j)
     @inbounds parent(itr)[!, j]
 end
 
+Base.getindex(itr::DataFrameColumns, j::Symbol) = parent(itr)[!, j]
+
 Base.getproperty(itr::DataFrameColumns, col_ind::Symbol) = getproperty(parent(itr), col_ind)
 # Private fields are never exposed since they can conflict with column names
 Base.propertynames(itr::DataFrameColumns, private::Bool=false) = propertynames(parent(itr))
 
+"""
+    keys(dfc::DataFrameColumns)
+
+Get a vector of column names of `dfc`.
+"""
+Base.keys(itr::DataFrameColumns) = names(parent(itr))
+
+"""
+    pairs(dfc::DataFrameColumns)
+
+Return an iterator of pairs associating the name of each column of `dfc`
+with the corresponding column vector, i.e. `name => col`
+where `name` is the column name of the column `col`.
+"""
+Base.pairs(itr::DataFrameColumns) = Base.Iterators.Pairs(itr, keys(itr))
+
 """
     mapcols(f::Union{Function,Type}, df::AbstractDataFrame)
 
@@ -265,24 +263,24 @@ Base.show(dfrs::DataFrameRows;
     show(stdout, dfrs, allrows=allrows, allcols=allcols, splitcols=splitcols,
          rowlabel=rowlabel, summary=summary)
 
-function Base.show(io::IO, dfcs::DataFrameColumns{T,V};
+function Base.show(io::IO, dfcs::DataFrameColumns;
                    allrows::Bool = !get(io, :limit, false),
                    allcols::Bool = !get(io, :limit, false),
                    splitcols = get(io, :limit, false),
                    rowlabel::Symbol = :Row,
-                   summary::Bool = true) where {T, V}
+                   summary::Bool = true)
     df = parent(dfcs)
-    summary && print(io, "$(nrow(df))×$(ncol(df)) DataFrameColumns (with names=$(V <: Pair))")
+    summary && print(io, "$(nrow(df))×$(ncol(df)) DataFrameColumns")
     _show(io, parent(dfcs), allrows=allrows, allcols=allcols, splitcols=splitcols,
           rowlabel=rowlabel, summary=false)
 end
 
-Base.show(io::IO, mime::MIME"text/plain", dfcs::DataFrameColumns{T,V};
+Base.show(io::IO, mime::MIME"text/plain", dfcs::DataFrameColumns;
           allrows::Bool = !get(io, :limit, false),
           allcols::Bool = !get(io, :limit, false),
           splitcols = get(io, :limit, false),
           rowlabel::Symbol = :Row,
-          summary::Bool = true) where {T, V} =
+          summary::Bool = true) =
     show(io, dfcs, allrows=allrows, allcols=allcols, splitcols=splitcols,
          rowlabel=rowlabel, summary=summary)
 

diff --git a/src/abstractdataframe/show.jl b/src/abstractdataframe/show.jl
@@ -128,7 +128,7 @@ function getmaxwidths(df::AbstractDataFrame,
     undefstrwidth = ourstrwidth(io, Base.undef_ref_str)
 
     j = 1
-    for (name, col) in eachcol(df, true)
+    for (name, col) in pairs(eachcol(df))
         # (1) Consider length of column name
         maxwidth = ourstrwidth(io, name)
 

diff --git a/src/deprecated.jl b/src/deprecated.jl
@@ -61,7 +61,6 @@ import Base: delete!, insert!, merge!
 
 import Base: map
 @deprecate map(f::Function, sdf::SubDataFrame) f(sdf)
-@deprecate map(f::Union{Function,Type}, dfc::DataFrameColumns{<:AbstractDataFrame, Pair{Symbol, AbstractVector}}) mapcols(f, dfc.df)
 
 @deprecate head(df::AbstractDataFrame) first(df, 6)
 @deprecate tail(df::AbstractDataFrame) last(df, 6)
@@ -347,3 +346,5 @@ function Base.join(df1::AbstractDataFrame, df2::AbstractDataFrame,
                             "joining more than two data frames"))
     end
 end
+
+@deprecate eachcol(df::AbstractDataFrame, names::Bool) names ? collect(pairs(eachcol(df))) : eachcol(df)
diff --git a/test/dataframerow.jl b/test/dataframerow.jl
@@ -475,4 +475,15 @@ end
     @test eltype(Vector(dfr)) == Int
 end
 
+@testset "DataFrameRow" begin
+    df = DataFrame(A = Vector{Union{Int, Missing}}(1:2), B = Vector{Union{Int, Missing}}(2:3))
+    row = DataFrameRow(df, 1, :)
+
+    row[:A] = 100
+    @test df[1, :A] == 100
+
+    row[1] = 101
+    @test df[1, :A] == 101
+end
+
 end # module
diff --git a/test/deprecated.jl b/test/deprecated.jl
@@ -222,13 +222,13 @@ end
         @test df[r"[ab]"] == DataFrame(a=1:3, b=4:6)
         @test df[Not(Not(r"[ab]"))] == DataFrame(a=1:3, b=4:6)
         @test df[Not(3)] == DataFrame(a=1:3, b=4:6)
-        @test eachcol(df, false)[1] === df[1]
-        @test eachcol(view(df,1:2), false)[1] == eachcol(df, false)[1]
-        @test eachcol(df[1:2], false)[1] == eachcol(df, false)[1]
-        @test eachcol(df[r"[ab]"], false)[1] == eachcol(df, false)[1]
-        @test eachcol(df[Not(Not(r"[ab]"))], false)[1] == eachcol(df, false)[1]
-        @test eachcol(df[Not(r"[c]")], false)[1] == eachcol(df, false)[1]
-        @test eachcol(df[1:2], false)[1] !== eachcol(df, false)[1]
+        @test eachcol(df)[1] === df[1]
+        @test eachcol(view(df,1:2))[1] == eachcol(df)[1]
+        @test eachcol(df[1:2])[1] == eachcol(df)[1]
+        @test eachcol(df[r"[ab]"])[1] == eachcol(df)[1]
+        @test eachcol(df[Not(Not(r"[ab]"))])[1] == eachcol(df)[1]
+        @test eachcol(df[Not(r"[c]")])[1] == eachcol(df)[1]
+        @test eachcol(df[1:2])[1] !== eachcol(df)[1]
         @test df[:] == df
         @test df[r""] == df
         @test df[Not(Not(r""))] == df
@@ -237,13 +237,13 @@ end
         @test df[r""] !== df
         @test df[Not(Not(r""))] !== df
         @test df[Not(1:0)] !== df
-        @test eachcol(view(df, :), false)[1] == eachcol(df, false)[1]
-        @test eachcol(df[:], false)[1] == eachcol(df, false)[1]
-        @test eachcol(df[r""], false)[1] == eachcol(df, false)[1]
-        @test eachcol(df[Not(1:0)], false)[1] == eachcol(df, false)[1]
-        @test eachcol(df[:], false)[1] !== eachcol(df, false)[1]
-        @test eachcol(df[r""], false)[1] !== eachcol(df, false)[1]
-        @test eachcol(df[Not(1:0)], false)[1] !== eachcol(df, false)[1]
+        @test eachcol(view(df, :))[1] == eachcol(df)[1]
+        @test eachcol(df[:])[1] == eachcol(df)[1]
+        @test eachcol(df[r""])[1] == eachcol(df)[1]
+        @test eachcol(df[Not(1:0)])[1] == eachcol(df)[1]
+        @test eachcol(df[:])[1] !== eachcol(df)[1]
+        @test eachcol(df[r""])[1] !== eachcol(df)[1]
+        @test eachcol(df[Not(1:0)])[1] !== eachcol(df)[1]
     end
     @testset "getindex df[col] and df[cols]" begin
         x = [1, 2, 3]
@@ -539,6 +539,72 @@ end
     @test_throws ArgumentError join(df1, df2, df3, on=:id, kind=:xxx)
 end
 
+@testset "eachcol(df, true)" begin
+    df = DataFrame(a=1:3, b=4:6, c=7:9)
+    @test eachcol(df)[1] === last(eachcol(df, true)[1])
+    @test eachcol(df)[1] === last(eachcol(df, true)[1])
+
+    df = DataFrame(rand(3,4), [:a, :b, :c, :d])
+    df2 = DataFrame(eachcol(df, true))
+    @test df == df2
+    df2 = DataFrame!(eachcol(df, true))
+    @test df == df2
+    @test all(((a,b),) -> a === b, zip(eachcol(df), eachcol(df2)))
+
+    @test Tables.rowtable(df) == Tables.rowtable((;eachcol(df, true)...))
+    @test Tables.columntable(df) == Tables.columntable((;eachcol(df, true)...))
+
+    for (a, b, c, d) in zip(Tables.rowtable(df),
+                            Tables.namedtupleiterator(eachrow(df)),
+                            Tables.namedtupleiterator(eachcol(df)),
+                            Tables.namedtupleiterator((;eachcol(df, true)...)))
+        @test a isa NamedTuple
+        @test a === b === c === d
+    end
+
+    @test Tables.getcolumn((;eachcol(df, true)...), 1) == Tables.getcolumn(df, 1)
+    @test Tables.getcolumn((;eachcol(df, true)...), :a) == Tables.getcolumn(df, :a)
+    @test Tables.columnnames((;eachcol(df, true)...)) == Tuple(Tables.columnnames(df))
+
+    df = DataFrame(A = Vector{Union{Int, Missing}}(1:2), B = Vector{Union{Int, Missing}}(2:3))
+    @test size(eachcol(df, true)) == (size(df, 2),)
+    @test parent(DataFrame(eachcol(df, true))) == df
+    @test names(DataFrame(eachcol(df, true))) == names(df)
+    @test IndexStyle(eachcol(df, true)) == IndexLinear()
+    @test size(eachcol(df, false)) == (size(df, 2),)
+    @test IndexStyle(eachcol(df, false)) == IndexLinear()
+    @test length(eachcol(df, true)) == size(df, 2)
+    @test length(eachcol(df, false)) == size(df, 2)
+    @test eachcol(df, true)[1] == (:A => df[:, 1])
+    @test eachcol(df, false)[1] == df[:, 1]
+    @test collect(eachcol(df, true)) isa Vector{Pair{Symbol, AbstractVector}}
+    @test collect(eachcol(df, true)) == [:A => [1, 2], :B => [2, 3]]
+    @test collect(eachcol(df, false)) isa Vector{AbstractVector}
+    @test collect(eachcol(df, false)) == [[1, 2], [2, 3]]
+    @test eltype(eachcol(df, true)) == Pair{Symbol, AbstractVector}
+    @test eltype(eachcol(df, false)) == AbstractVector
+    for col in eachcol(df, true)
+        @test typeof(col) <: Pair{Symbol, <:AbstractVector}
+    end
+    for col in eachcol(df, false)
+        @test isa(col, AbstractVector)
+    end
+    @test map(minimum, eachcol(df, false)) == [1, 2]
+    @test eltype(map(Vector{Float64}, eachcol(df, false))) == Vector{Float64}
+
+    df = DataFrame([11:16 21:26 31:36 41:46])
+    sdf = view(df, [3,1,4], [3,1,4])
+    @test eachcol(sdf, true) == eachcol(df[[3,1,4], [3,1,4]], true)
+    @test eachcol(sdf, false) == eachcol(df[[3,1,4], [3,1,4]], false)
+    @test size(eachcol(sdf, true)) == (3,)
+    @test size(eachcol(sdf, false)) == (3,)
+
+    df_base = DataFrame([11:16 21:26 31:36 41:46])
+    for df in (df_base, view(df_base, 1:3, 1:3))
+        @test df == DataFrame(eachcol(df, true))
+    end
+end
+
 global_logger(old_logger)
 
 end # module
diff --git a/test/indexing.jl b/test/indexing.jl
@@ -19,9 +19,6 @@ using Test, DataFrames
         @test dfx[!, 1] === df[!, names(dfx)[1]]
     end
 
-    @test eachcol(df)[1] === last(eachcol(df, true)[1])
-    @test eachcol(df)[1] === last(eachcol(df, true)[1])
-
     @test df[1, 1] == 1
     @test df[1, 1:2] isa DataFrameRow
     @test df[1, r"[ab]"] isa DataFrameRow

diff --git a/test/io.jl b/test/io.jl
@@ -79,17 +79,7 @@ end
     io = IOBuffer()
     show(io, "text/html", eachcol(df))
     str = String(take!(io))
-    @test str == "<p>2×2 DataFrameColumns (with names=false)</p>" *
-                 "<table class=\"data-frame\"><thead><tr><th>" *
-                 "</th><th>Fish</th><th>Mass</th></tr>" *
-                 "<tr><th></th><th>String</th><th>Float64⍰</th></tr></thead><tbody>" *
-                 "<tr><th>1</th><td>#undef</td><td>1.5</td></tr>" *
-                 "<tr><th>2</th><td>#undef</td><td>missing</td></tr></tbody></table>"
-
-    io = IOBuffer()
-    show(io, "text/html", eachcol(df, true))
-    str = String(take!(io))
-    @test str == "<p>2×2 DataFrameColumns (with names=true)</p>" *
+    @test str == "<p>2×2 DataFrameColumns</p>" *
                  "<table class=\"data-frame\"><thead><tr><th>" *
                  "</th><th>Fish</th><th>Mass</th></tr>" *
                  "<tr><th></th><th>String</th><th>Float64⍰</th></tr></thead><tbody>" *
@@ -244,8 +234,7 @@ end
                    (DataFrames.index(df[1:1, 1:1]), "data frame with 1 column"),
                    (DataFrames.index(view(df, 1:1, 1:1)), "data frame with 1 column"),
                    (eachrow(df), "2-element DataFrameRows"),
-                   (eachcol(df), "3-element DataFrameColumns (with names=false)"),
-                   (eachcol(df, true), "3-element DataFrameColumns (with names=true)")]
+                   (eachcol(df), "3-element DataFrameColumns")]
         @test summary(v) == s
         io = IOBuffer()
         summary(io, v)