Skip to content

Commit

Permalink
deprecate names=true in eachcol
Browse files Browse the repository at this point in the history
  • Loading branch information
bkamins authored Feb 26, 2020
1 parent 3f94ee4 commit 4276bfe
Show file tree
Hide file tree
Showing 15 changed files with 214 additions and 194 deletions.
8 changes: 0 additions & 8 deletions docs/src/lib/types.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,6 @@ serves as an iterator over rows of an `AbstractDataFrame`, returning `DataFrameR

Similarly, the `eachcol` function returns a value of the `DataFrameColumns` type, which
serves as an iterator over columns of an `AbstractDataFrame`.
The return value can have two concrete types:

* If the `eachcol` function is called with the `names` argument set to `true` then it returns a value of the
`DataFrameColumns{<:AbstractDataFrame, Pair{Symbol, AbstractVector}}` type, which is an
iterator returning a pair containing the column name and the column vector.
* If the `eachcol` function is called with `names` argument set to `false` (the default) then it returns a value of the
`DataFrameColumns{<:AbstractDataFrame, AbstractVector}` type, which is an
iterator returning the column vector only.

The `DataFrameRows` and `DataFrameColumns` types are subtypes of `AbstractVector` and support its interface
with the exception that they are read only. Note that they are not exported and should not be constructed directly,
Expand Down
2 changes: 1 addition & 1 deletion docs/src/man/getting_started.md
Original file line number Diff line number Diff line change
Expand Up @@ -775,7 +775,7 @@ true
julia> df[1] !== x
true
julia> eachcol(df, false)[1] === df.x
julia> eachcol(df)[1] === df.x
true
```

Expand Down
2 changes: 1 addition & 1 deletion src/abstractdataframe/abstractdataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1011,7 +1011,7 @@ function Base.convert(::Type{Matrix{T}}, df::AbstractDataFrame) where T
n, p = size(df)
res = Matrix{T}(undef, n, p)
idx = 1
for (name, col) in eachcol(df, true)
for (name, col) in pairs(eachcol(df))
try
copyto!(res, idx, col)
catch err
Expand Down
6 changes: 3 additions & 3 deletions src/abstractdataframe/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -162,11 +162,11 @@ function Base.show(io::IO, mime::MIME"text/html", dfrs::DataFrameRows; summary::
_show(io, mime, df, summary=false)
end

function Base.show(io::IO, mime::MIME"text/html", dfcs::DataFrameColumns{T,V};
summary::Bool=true) where {T,V}
function Base.show(io::IO, mime::MIME"text/html", dfcs::DataFrameColumns;
summary::Bool=true)
df = parent(dfcs)
if summary
write(io, "<p>$(nrow(df))×$(ncol(df)) DataFrameColumns (with names=$(V <: Pair))</p>")
write(io, "<p>$(nrow(df))×$(ncol(df)) DataFrameColumns</p>")
end
_show(io, mime, df, summary=false)
end
Expand Down
74 changes: 36 additions & 38 deletions src/abstractdataframe/iteration.jl
Original file line number Diff line number Diff line change
Expand Up @@ -90,30 +90,27 @@ Base.getproperty(itr::DataFrameRows, col_ind::Symbol) = getproperty(parent(itr),
Base.propertynames(itr::DataFrameRows, private::Bool=false) = propertynames(parent(itr))

# Iteration by columns

"""
DataFrameColumns{<:AbstractDataFrame, V} <: AbstractVector{V}
DataFrameColumns{<:AbstractDataFrame} <: AbstractVector{AbstractVector}
Iterator over columns of an `AbstractDataFrame` constructed using
[`eachcol(df, true)`](@ref) if `V` is a `Pair{Symbol,AbstractVector}`. Then each
returned value is a pair consisting of column name and column vector.
If `V` is an `AbstractVector` (a value returned by [`eachcol(df, false)`](@ref))
then each returned value is a column vector.
An `AbstractVector` that allows iteration over columns of an `AbstractDataFrame`.
Indexing into `DataFrameColumns` objects using integer or symbol indices
returns the corresponding column (without copying).
"""
struct DataFrameColumns{T<:AbstractDataFrame, V} <: AbstractVector{V}
struct DataFrameColumns{T<:AbstractDataFrame} <: AbstractVector{AbstractVector}
df::T
end

Base.summary(dfcs::DataFrameColumns{T, V}) where {T,V} =
"$(length(dfcs))-element DataFrameColumns (with names=$(V <: Pair))"
Base.summary(dfcs::DataFrameColumns)= "$(length(dfcs))-element DataFrameColumns"
Base.summary(io::IO, dfcs::DataFrameColumns) = print(io, summary(dfcs))

"""
eachcol(df::AbstractDataFrame, names::Bool=false)
eachcol(df::AbstractDataFrame)
Return a `DataFrameColumns` that iterates an `AbstractDataFrame` column by column.
If `names` is equal to `false` (the default) iteration returns column vectors.
If `names` is equal to `true` pairs consisting of column name and column vector
are yielded.
Return a `DataFrameColumns` that is an `AbstractVector`
that allows iterating an `AbstractDataFrame` column by column.
Additionally it is allowed to index `DataFrameColumns` using column names.
# Examples
```jldoctest
Expand Down Expand Up @@ -143,39 +140,40 @@ julia> sum.(eachcol(df))
2-element Array{Int64,1}:
10
50
julia> collect(eachcol(df, true))
2-element Array{Pair{Symbol,AbstractArray{T,1} where T},1}:
:x => [1, 2, 3, 4]
:y => [11, 12, 13, 14]
```
"""
@inline function eachcol(df::T, names::Bool=false) where T<: AbstractDataFrame
if names
DataFrameColumns{T, Pair{Symbol, AbstractVector}}(df)
else
DataFrameColumns{T, AbstractVector}(df)
end
end
eachcol(df::AbstractDataFrame) = DataFrameColumns(df)

Base.size(itr::DataFrameColumns) = (size(parent(itr), 2),)
Base.IndexStyle(::Type{<:DataFrameColumns}) = Base.IndexLinear()

@inline function Base.getindex(itr::DataFrameColumns{<:AbstractDataFrame,
Pair{Symbol, AbstractVector}}, j::Int)
@boundscheck checkbounds(itr, j)
@inbounds _names(parent(itr))[j] => parent(itr)[!, j]
end

@inline function Base.getindex(itr::DataFrameColumns{<:AbstractDataFrame, AbstractVector}, j::Int)
@inline function Base.getindex(itr::DataFrameColumns, j::Int)
@boundscheck checkbounds(itr, j)
@inbounds parent(itr)[!, j]
end

Base.getindex(itr::DataFrameColumns, j::Symbol) = parent(itr)[!, j]

Base.getproperty(itr::DataFrameColumns, col_ind::Symbol) = getproperty(parent(itr), col_ind)
# Private fields are never exposed since they can conflict with column names
Base.propertynames(itr::DataFrameColumns, private::Bool=false) = propertynames(parent(itr))

"""
keys(dfc::DataFrameColumns)
Get a vector of column names of `dfc`.
"""
Base.keys(itr::DataFrameColumns) = names(parent(itr))

"""
pairs(dfc::DataFrameColumns)
Return an iterator of pairs associating the name of each column of `dfc`
with the corresponding column vector, i.e. `name => col`
where `name` is the column name of the column `col`.
"""
Base.pairs(itr::DataFrameColumns) = Base.Iterators.Pairs(itr, keys(itr))

"""
mapcols(f::Union{Function,Type}, df::AbstractDataFrame)
Expand Down Expand Up @@ -265,24 +263,24 @@ Base.show(dfrs::DataFrameRows;
show(stdout, dfrs, allrows=allrows, allcols=allcols, splitcols=splitcols,
rowlabel=rowlabel, summary=summary)

function Base.show(io::IO, dfcs::DataFrameColumns{T,V};
function Base.show(io::IO, dfcs::DataFrameColumns;
allrows::Bool = !get(io, :limit, false),
allcols::Bool = !get(io, :limit, false),
splitcols = get(io, :limit, false),
rowlabel::Symbol = :Row,
summary::Bool = true) where {T, V}
summary::Bool = true)
df = parent(dfcs)
summary && print(io, "$(nrow(df))×$(ncol(df)) DataFrameColumns (with names=$(V <: Pair))")
summary && print(io, "$(nrow(df))×$(ncol(df)) DataFrameColumns")
_show(io, parent(dfcs), allrows=allrows, allcols=allcols, splitcols=splitcols,
rowlabel=rowlabel, summary=false)
end

Base.show(io::IO, mime::MIME"text/plain", dfcs::DataFrameColumns{T,V};
Base.show(io::IO, mime::MIME"text/plain", dfcs::DataFrameColumns;
allrows::Bool = !get(io, :limit, false),
allcols::Bool = !get(io, :limit, false),
splitcols = get(io, :limit, false),
rowlabel::Symbol = :Row,
summary::Bool = true) where {T, V} =
summary::Bool = true) =
show(io, dfcs, allrows=allrows, allcols=allcols, splitcols=splitcols,
rowlabel=rowlabel, summary=summary)

Expand Down
2 changes: 1 addition & 1 deletion src/abstractdataframe/show.jl
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ function getmaxwidths(df::AbstractDataFrame,
undefstrwidth = ourstrwidth(io, Base.undef_ref_str)

j = 1
for (name, col) in eachcol(df, true)
for (name, col) in pairs(eachcol(df))
# (1) Consider length of column name
maxwidth = ourstrwidth(io, name)

Expand Down
3 changes: 2 additions & 1 deletion src/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ import Base: delete!, insert!, merge!

import Base: map
@deprecate map(f::Function, sdf::SubDataFrame) f(sdf)
@deprecate map(f::Union{Function,Type}, dfc::DataFrameColumns{<:AbstractDataFrame, Pair{Symbol, AbstractVector}}) mapcols(f, dfc.df)

@deprecate head(df::AbstractDataFrame) first(df, 6)
@deprecate tail(df::AbstractDataFrame) last(df, 6)
Expand Down Expand Up @@ -347,3 +346,5 @@ function Base.join(df1::AbstractDataFrame, df2::AbstractDataFrame,
"joining more than two data frames"))
end
end

@deprecate eachcol(df::AbstractDataFrame, names::Bool) names ? collect(pairs(eachcol(df))) : eachcol(df)
11 changes: 11 additions & 0 deletions test/dataframerow.jl
Original file line number Diff line number Diff line change
Expand Up @@ -475,4 +475,15 @@ end
@test eltype(Vector(dfr)) == Int
end

@testset "DataFrameRow" begin
df = DataFrame(A = Vector{Union{Int, Missing}}(1:2), B = Vector{Union{Int, Missing}}(2:3))
row = DataFrameRow(df, 1, :)

row[:A] = 100
@test df[1, :A] == 100

row[1] = 101
@test df[1, :A] == 101
end

end # module
94 changes: 80 additions & 14 deletions test/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -222,13 +222,13 @@ end
@test df[r"[ab]"] == DataFrame(a=1:3, b=4:6)
@test df[Not(Not(r"[ab]"))] == DataFrame(a=1:3, b=4:6)
@test df[Not(3)] == DataFrame(a=1:3, b=4:6)
@test eachcol(df, false)[1] === df[1]
@test eachcol(view(df,1:2), false)[1] == eachcol(df, false)[1]
@test eachcol(df[1:2], false)[1] == eachcol(df, false)[1]
@test eachcol(df[r"[ab]"], false)[1] == eachcol(df, false)[1]
@test eachcol(df[Not(Not(r"[ab]"))], false)[1] == eachcol(df, false)[1]
@test eachcol(df[Not(r"[c]")], false)[1] == eachcol(df, false)[1]
@test eachcol(df[1:2], false)[1] !== eachcol(df, false)[1]
@test eachcol(df)[1] === df[1]
@test eachcol(view(df,1:2))[1] == eachcol(df)[1]
@test eachcol(df[1:2])[1] == eachcol(df)[1]
@test eachcol(df[r"[ab]"])[1] == eachcol(df)[1]
@test eachcol(df[Not(Not(r"[ab]"))])[1] == eachcol(df)[1]
@test eachcol(df[Not(r"[c]")])[1] == eachcol(df)[1]
@test eachcol(df[1:2])[1] !== eachcol(df)[1]
@test df[:] == df
@test df[r""] == df
@test df[Not(Not(r""))] == df
Expand All @@ -237,13 +237,13 @@ end
@test df[r""] !== df
@test df[Not(Not(r""))] !== df
@test df[Not(1:0)] !== df
@test eachcol(view(df, :), false)[1] == eachcol(df, false)[1]
@test eachcol(df[:], false)[1] == eachcol(df, false)[1]
@test eachcol(df[r""], false)[1] == eachcol(df, false)[1]
@test eachcol(df[Not(1:0)], false)[1] == eachcol(df, false)[1]
@test eachcol(df[:], false)[1] !== eachcol(df, false)[1]
@test eachcol(df[r""], false)[1] !== eachcol(df, false)[1]
@test eachcol(df[Not(1:0)], false)[1] !== eachcol(df, false)[1]
@test eachcol(view(df, :))[1] == eachcol(df)[1]
@test eachcol(df[:])[1] == eachcol(df)[1]
@test eachcol(df[r""])[1] == eachcol(df)[1]
@test eachcol(df[Not(1:0)])[1] == eachcol(df)[1]
@test eachcol(df[:])[1] !== eachcol(df)[1]
@test eachcol(df[r""])[1] !== eachcol(df)[1]
@test eachcol(df[Not(1:0)])[1] !== eachcol(df)[1]
end
@testset "getindex df[col] and df[cols]" begin
x = [1, 2, 3]
Expand Down Expand Up @@ -539,6 +539,72 @@ end
@test_throws ArgumentError join(df1, df2, df3, on=:id, kind=:xxx)
end

@testset "eachcol(df, true)" begin
df = DataFrame(a=1:3, b=4:6, c=7:9)
@test eachcol(df)[1] === last(eachcol(df, true)[1])
@test eachcol(df)[1] === last(eachcol(df, true)[1])

df = DataFrame(rand(3,4), [:a, :b, :c, :d])
df2 = DataFrame(eachcol(df, true))
@test df == df2
df2 = DataFrame!(eachcol(df, true))
@test df == df2
@test all(((a,b),) -> a === b, zip(eachcol(df), eachcol(df2)))

@test Tables.rowtable(df) == Tables.rowtable((;eachcol(df, true)...))
@test Tables.columntable(df) == Tables.columntable((;eachcol(df, true)...))

for (a, b, c, d) in zip(Tables.rowtable(df),
Tables.namedtupleiterator(eachrow(df)),
Tables.namedtupleiterator(eachcol(df)),
Tables.namedtupleiterator((;eachcol(df, true)...)))
@test a isa NamedTuple
@test a === b === c === d
end

@test Tables.getcolumn((;eachcol(df, true)...), 1) == Tables.getcolumn(df, 1)
@test Tables.getcolumn((;eachcol(df, true)...), :a) == Tables.getcolumn(df, :a)
@test Tables.columnnames((;eachcol(df, true)...)) == Tuple(Tables.columnnames(df))

df = DataFrame(A = Vector{Union{Int, Missing}}(1:2), B = Vector{Union{Int, Missing}}(2:3))
@test size(eachcol(df, true)) == (size(df, 2),)
@test parent(DataFrame(eachcol(df, true))) == df
@test names(DataFrame(eachcol(df, true))) == names(df)
@test IndexStyle(eachcol(df, true)) == IndexLinear()
@test size(eachcol(df, false)) == (size(df, 2),)
@test IndexStyle(eachcol(df, false)) == IndexLinear()
@test length(eachcol(df, true)) == size(df, 2)
@test length(eachcol(df, false)) == size(df, 2)
@test eachcol(df, true)[1] == (:A => df[:, 1])
@test eachcol(df, false)[1] == df[:, 1]
@test collect(eachcol(df, true)) isa Vector{Pair{Symbol, AbstractVector}}
@test collect(eachcol(df, true)) == [:A => [1, 2], :B => [2, 3]]
@test collect(eachcol(df, false)) isa Vector{AbstractVector}
@test collect(eachcol(df, false)) == [[1, 2], [2, 3]]
@test eltype(eachcol(df, true)) == Pair{Symbol, AbstractVector}
@test eltype(eachcol(df, false)) == AbstractVector
for col in eachcol(df, true)
@test typeof(col) <: Pair{Symbol, <:AbstractVector}
end
for col in eachcol(df, false)
@test isa(col, AbstractVector)
end
@test map(minimum, eachcol(df, false)) == [1, 2]
@test eltype(map(Vector{Float64}, eachcol(df, false))) == Vector{Float64}

df = DataFrame([11:16 21:26 31:36 41:46])
sdf = view(df, [3,1,4], [3,1,4])
@test eachcol(sdf, true) == eachcol(df[[3,1,4], [3,1,4]], true)
@test eachcol(sdf, false) == eachcol(df[[3,1,4], [3,1,4]], false)
@test size(eachcol(sdf, true)) == (3,)
@test size(eachcol(sdf, false)) == (3,)

df_base = DataFrame([11:16 21:26 31:36 41:46])
for df in (df_base, view(df_base, 1:3, 1:3))
@test df == DataFrame(eachcol(df, true))
end
end

global_logger(old_logger)

end # module
3 changes: 0 additions & 3 deletions test/indexing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,6 @@ using Test, DataFrames
@test dfx[!, 1] === df[!, names(dfx)[1]]
end

@test eachcol(df)[1] === last(eachcol(df, true)[1])
@test eachcol(df)[1] === last(eachcol(df, true)[1])

@test df[1, 1] == 1
@test df[1, 1:2] isa DataFrameRow
@test df[1, r"[ab]"] isa DataFrameRow
Expand Down
15 changes: 2 additions & 13 deletions test/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -79,17 +79,7 @@ end
io = IOBuffer()
show(io, "text/html", eachcol(df))
str = String(take!(io))
@test str == "<p>2×2 DataFrameColumns (with names=false)</p>" *
"<table class=\"data-frame\"><thead><tr><th>" *
"</th><th>Fish</th><th>Mass</th></tr>" *
"<tr><th></th><th>String</th><th>Float64⍰</th></tr></thead><tbody>" *
"<tr><th>1</th><td>#undef</td><td>1.5</td></tr>" *
"<tr><th>2</th><td>#undef</td><td>missing</td></tr></tbody></table>"

io = IOBuffer()
show(io, "text/html", eachcol(df, true))
str = String(take!(io))
@test str == "<p>2×2 DataFrameColumns (with names=true)</p>" *
@test str == "<p>2×2 DataFrameColumns</p>" *
"<table class=\"data-frame\"><thead><tr><th>" *
"</th><th>Fish</th><th>Mass</th></tr>" *
"<tr><th></th><th>String</th><th>Float64⍰</th></tr></thead><tbody>" *
Expand Down Expand Up @@ -244,8 +234,7 @@ end
(DataFrames.index(df[1:1, 1:1]), "data frame with 1 column"),
(DataFrames.index(view(df, 1:1, 1:1)), "data frame with 1 column"),
(eachrow(df), "2-element DataFrameRows"),
(eachcol(df), "3-element DataFrameColumns (with names=false)"),
(eachcol(df, true), "3-element DataFrameColumns (with names=true)")]
(eachcol(df), "3-element DataFrameColumns")]
@test summary(v) == s
io = IOBuffer()
summary(io, v)
Expand Down
Loading

0 comments on commit 4276bfe

Please sign in to comment.