Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BREAKING] deprecate names=true in eachcol #2120

Merged
merged 5 commits into from
Feb 26, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions docs/src/lib/types.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,6 @@ serves as an iterator over rows of an `AbstractDataFrame`, returning `DataFrameR

Similarly, the `eachcol` function returns a value of the `DataFrameColumns` type, which
serves as an iterator over columns of an `AbstractDataFrame`.
The return value can have two concrete types:

* If the `eachcol` function is called with the `names` argument set to `true` then it returns a value of the
`DataFrameColumns{<:AbstractDataFrame, Pair{Symbol, AbstractVector}}` type, which is an
iterator returning a pair containing the column name and the column vector.
* If the `eachcol` function is called with `names` argument set to `false` (the default) then it returns a value of the
`DataFrameColumns{<:AbstractDataFrame, AbstractVector}` type, which is an
iterator returning the column vector only.

The `DataFrameRows` and `DataFrameColumns` types are subtypes of `AbstractVector` and support its interface
with the exception that they are read only. Note that they are not exported and should not be constructed directly,
Expand Down
2 changes: 1 addition & 1 deletion docs/src/man/getting_started.md
Original file line number Diff line number Diff line change
Expand Up @@ -775,7 +775,7 @@ true
julia> df[1] !== x
true

julia> eachcol(df, false)[1] === df.x
julia> eachcol(df)[1] === df.x
true
```

Expand Down
2 changes: 1 addition & 1 deletion src/abstractdataframe/abstractdataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -934,7 +934,7 @@ function Base.convert(::Type{Matrix{T}}, df::AbstractDataFrame) where T
n, p = size(df)
res = Matrix{T}(undef, n, p)
idx = 1
for (name, col) in eachcol(df, true)
for (name, col) in pairs(eachcol(df))
try
copyto!(res, idx, col)
catch err
Expand Down
6 changes: 3 additions & 3 deletions src/abstractdataframe/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -162,11 +162,11 @@ function Base.show(io::IO, mime::MIME"text/html", dfrs::DataFrameRows; summary::
_show(io, mime, df, summary=false)
end

function Base.show(io::IO, mime::MIME"text/html", dfcs::DataFrameColumns{T,V};
summary::Bool=true) where {T,V}
function Base.show(io::IO, mime::MIME"text/html", dfcs::DataFrameColumns;
summary::Bool=true)
df = parent(dfcs)
if summary
write(io, "<p>$(nrow(df))×$(ncol(df)) DataFrameColumns (with names=$(V <: Pair))</p>")
write(io, "<p>$(nrow(df))×$(ncol(df)) DataFrameColumns</p>")
end
_show(io, mime, df, summary=false)
end
Expand Down
74 changes: 36 additions & 38 deletions src/abstractdataframe/iteration.jl
Original file line number Diff line number Diff line change
Expand Up @@ -90,30 +90,27 @@ Base.getproperty(itr::DataFrameRows, col_ind::Symbol) = getproperty(parent(itr),
Base.propertynames(itr::DataFrameRows, private::Bool=false) = propertynames(parent(itr))

# Iteration by columns

"""
DataFrameColumns{<:AbstractDataFrame, V} <: AbstractVector{V}
DataFrameColumns{<:AbstractDataFrame} <: AbstractVector{AbstractVector}

Iterator over columns of an `AbstractDataFrame` constructed using
[`eachcol(df, true)`](@ref) if `V` is a `Pair{Symbol,AbstractVector}`. Then each
returned value is a pair consisting of column name and column vector.
If `V` is an `AbstractVector` (a value returned by [`eachcol(df, false)`](@ref))
then each returned value is a column vector.
An `AbstractVector` that allows iteration over columns of an `AbstractDataFrame`.
Indexing into `DataFrameColumns` objects using integer or symbol indices
returns the corresponding column (without copying).
"""
struct DataFrameColumns{T<:AbstractDataFrame, V} <: AbstractVector{V}
struct DataFrameColumns{T<:AbstractDataFrame} <: AbstractVector{AbstractVector}
df::T
end

Base.summary(dfcs::DataFrameColumns{T, V}) where {T,V} =
"$(length(dfcs))-element DataFrameColumns (with names=$(V <: Pair))"
Base.summary(dfcs::DataFrameColumns)= "$(length(dfcs))-element DataFrameColumns"
Base.summary(io::IO, dfcs::DataFrameColumns) = print(io, summary(dfcs))

"""
eachcol(df::AbstractDataFrame, names::Bool=false)
eachcol(df::AbstractDataFrame)

Return a `DataFrameColumns` that iterates an `AbstractDataFrame` column by column.
If `names` is equal to `false` (the default) iteration returns column vectors.
If `names` is equal to `true` pairs consisting of column name and column vector
are yielded.
Return a `DataFrameColumns` that is an `AbstractVector`
that allows iterating an `AbstractDataFrame` column by column.
Additionally it is allowed to index `DataFrameColumns` using column names.

# Examples
```jldoctest
Expand Down Expand Up @@ -143,39 +140,40 @@ julia> sum.(eachcol(df))
2-element Array{Int64,1}:
10
50

julia> collect(eachcol(df, true))
2-element Array{Pair{Symbol,AbstractArray{T,1} where T},1}:
:x => [1, 2, 3, 4]
:y => [11, 12, 13, 14]
```
"""
@inline function eachcol(df::T, names::Bool=false) where T<: AbstractDataFrame
if names
DataFrameColumns{T, Pair{Symbol, AbstractVector}}(df)
else
DataFrameColumns{T, AbstractVector}(df)
end
end
eachcol(df::AbstractDataFrame) = DataFrameColumns(df)

Base.size(itr::DataFrameColumns) = (size(parent(itr), 2),)
Base.IndexStyle(::Type{<:DataFrameColumns}) = Base.IndexLinear()

@inline function Base.getindex(itr::DataFrameColumns{<:AbstractDataFrame,
Pair{Symbol, AbstractVector}}, j::Int)
@boundscheck checkbounds(itr, j)
@inbounds _names(parent(itr))[j] => parent(itr)[!, j]
end

@inline function Base.getindex(itr::DataFrameColumns{<:AbstractDataFrame, AbstractVector}, j::Int)
@inline function Base.getindex(itr::DataFrameColumns, j::Int)
@boundscheck checkbounds(itr, j)
@inbounds parent(itr)[!, j]
end

Base.getindex(itr::DataFrameColumns, j::Symbol) = parent(itr)[!, j]

Base.getproperty(itr::DataFrameColumns, col_ind::Symbol) = getproperty(parent(itr), col_ind)
# Private fields are never exposed since they can conflict with column names
Base.propertynames(itr::DataFrameColumns, private::Bool=false) = propertynames(parent(itr))

"""
keys(dfc::DataFrameColumns)

Get a vector of column names of `dfc`.
"""
Base.keys(itr::DataFrameColumns) = names(parent(itr))

"""
pairs(dfc::DataFrameColumns)

Return an iterator of pairs associating the name of each column of `dfc`
with the corresponding column vector, i.e. `name => col`
where `name` is the column name of the column `col`.
"""
Base.pairs(itr::DataFrameColumns) = Base.Iterators.Pairs(itr, keys(itr))

"""
mapcols(f::Union{Function,Type}, df::AbstractDataFrame)

Expand Down Expand Up @@ -265,24 +263,24 @@ Base.show(dfrs::DataFrameRows;
show(stdout, dfrs, allrows=allrows, allcols=allcols, splitcols=splitcols,
rowlabel=rowlabel, summary=summary)

function Base.show(io::IO, dfcs::DataFrameColumns{T,V};
function Base.show(io::IO, dfcs::DataFrameColumns;
allrows::Bool = !get(io, :limit, false),
allcols::Bool = !get(io, :limit, false),
splitcols = get(io, :limit, false),
rowlabel::Symbol = :Row,
summary::Bool = true) where {T, V}
summary::Bool = true)
df = parent(dfcs)
summary && print(io, "$(nrow(df))×$(ncol(df)) DataFrameColumns (with names=$(V <: Pair))")
summary && print(io, "$(nrow(df))×$(ncol(df)) DataFrameColumns")
_show(io, parent(dfcs), allrows=allrows, allcols=allcols, splitcols=splitcols,
rowlabel=rowlabel, summary=false)
end

Base.show(io::IO, mime::MIME"text/plain", dfcs::DataFrameColumns{T,V};
Base.show(io::IO, mime::MIME"text/plain", dfcs::DataFrameColumns;
allrows::Bool = !get(io, :limit, false),
allcols::Bool = !get(io, :limit, false),
splitcols = get(io, :limit, false),
rowlabel::Symbol = :Row,
summary::Bool = true) where {T, V} =
summary::Bool = true) =
show(io, dfcs, allrows=allrows, allcols=allcols, splitcols=splitcols,
rowlabel=rowlabel, summary=summary)

Expand Down
2 changes: 1 addition & 1 deletion src/abstractdataframe/show.jl
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ function getmaxwidths(df::AbstractDataFrame,
undefstrwidth = ourstrwidth(io, Base.undef_ref_str)

j = 1
for (name, col) in eachcol(df, true)
for (name, col) in pairs(eachcol(df))
# (1) Consider length of column name
maxwidth = ourstrwidth(io, name)

Expand Down
3 changes: 2 additions & 1 deletion src/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ import Base: delete!, insert!, merge!

import Base: map
@deprecate map(f::Function, sdf::SubDataFrame) f(sdf)
@deprecate map(f::Union{Function,Type}, dfc::DataFrameColumns{<:AbstractDataFrame, Pair{Symbol, AbstractVector}}) mapcols(f, dfc.df)

@deprecate head(df::AbstractDataFrame) first(df, 6)
@deprecate tail(df::AbstractDataFrame) last(df, 6)
Expand Down Expand Up @@ -347,3 +346,5 @@ function Base.join(df1::AbstractDataFrame, df2::AbstractDataFrame,
"joining more than two data frames"))
end
end

@deprecate eachcol(df::AbstractDataFrame, names::Bool) names ? collect(pairs(eachcol(df))) : eachcol(df)
11 changes: 11 additions & 0 deletions test/dataframerow.jl
Original file line number Diff line number Diff line change
Expand Up @@ -475,4 +475,15 @@ end
@test eltype(Vector(dfr)) == Int
end

@testset "DataFrameRow" begin
df = DataFrame(A = Vector{Union{Int, Missing}}(1:2), B = Vector{Union{Int, Missing}}(2:3))
row = DataFrameRow(df, 1, :)

row[:A] = 100
@test df[1, :A] == 100

row[1] = 101
@test df[1, :A] == 101
end

end # module
94 changes: 80 additions & 14 deletions test/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -222,13 +222,13 @@ end
@test df[r"[ab]"] == DataFrame(a=1:3, b=4:6)
@test df[Not(Not(r"[ab]"))] == DataFrame(a=1:3, b=4:6)
@test df[Not(3)] == DataFrame(a=1:3, b=4:6)
@test eachcol(df, false)[1] === df[1]
@test eachcol(view(df,1:2), false)[1] == eachcol(df, false)[1]
@test eachcol(df[1:2], false)[1] == eachcol(df, false)[1]
@test eachcol(df[r"[ab]"], false)[1] == eachcol(df, false)[1]
@test eachcol(df[Not(Not(r"[ab]"))], false)[1] == eachcol(df, false)[1]
@test eachcol(df[Not(r"[c]")], false)[1] == eachcol(df, false)[1]
@test eachcol(df[1:2], false)[1] !== eachcol(df, false)[1]
@test eachcol(df)[1] === df[1]
@test eachcol(view(df,1:2))[1] == eachcol(df)[1]
@test eachcol(df[1:2])[1] == eachcol(df)[1]
@test eachcol(df[r"[ab]"])[1] == eachcol(df)[1]
@test eachcol(df[Not(Not(r"[ab]"))])[1] == eachcol(df)[1]
@test eachcol(df[Not(r"[c]")])[1] == eachcol(df)[1]
@test eachcol(df[1:2])[1] !== eachcol(df)[1]
@test df[:] == df
@test df[r""] == df
@test df[Not(Not(r""))] == df
Expand All @@ -237,13 +237,13 @@ end
@test df[r""] !== df
@test df[Not(Not(r""))] !== df
@test df[Not(1:0)] !== df
@test eachcol(view(df, :), false)[1] == eachcol(df, false)[1]
@test eachcol(df[:], false)[1] == eachcol(df, false)[1]
@test eachcol(df[r""], false)[1] == eachcol(df, false)[1]
@test eachcol(df[Not(1:0)], false)[1] == eachcol(df, false)[1]
@test eachcol(df[:], false)[1] !== eachcol(df, false)[1]
@test eachcol(df[r""], false)[1] !== eachcol(df, false)[1]
@test eachcol(df[Not(1:0)], false)[1] !== eachcol(df, false)[1]
@test eachcol(view(df, :))[1] == eachcol(df)[1]
@test eachcol(df[:])[1] == eachcol(df)[1]
@test eachcol(df[r""])[1] == eachcol(df)[1]
@test eachcol(df[Not(1:0)])[1] == eachcol(df)[1]
@test eachcol(df[:])[1] !== eachcol(df)[1]
@test eachcol(df[r""])[1] !== eachcol(df)[1]
@test eachcol(df[Not(1:0)])[1] !== eachcol(df)[1]
end
@testset "getindex df[col] and df[cols]" begin
x = [1, 2, 3]
Expand Down Expand Up @@ -539,6 +539,72 @@ end
@test_throws ArgumentError join(df1, df2, df3, on=:id, kind=:xxx)
end

@testset "eachcol(df, true)" begin
df = DataFrame(a=1:3, b=4:6, c=7:9)
@test eachcol(df)[1] === last(eachcol(df, true)[1])
@test eachcol(df)[1] === last(eachcol(df, true)[1])

df = DataFrame(rand(3,4), [:a, :b, :c, :d])
df2 = DataFrame(eachcol(df, true))
@test df == df2
df2 = DataFrame!(eachcol(df, true))
@test df == df2
@test all(((a,b),) -> a === b, zip(eachcol(df), eachcol(df2)))

@test Tables.rowtable(df) == Tables.rowtable((;eachcol(df, true)...))
@test Tables.columntable(df) == Tables.columntable((;eachcol(df, true)...))

for (a, b, c, d) in zip(Tables.rowtable(df),
Tables.namedtupleiterator(eachrow(df)),
Tables.namedtupleiterator(eachcol(df)),
Tables.namedtupleiterator((;eachcol(df, true)...)))
@test a isa NamedTuple
@test a === b === c === d
end

@test Tables.getcolumn((;eachcol(df, true)...), 1) == Tables.getcolumn(df, 1)
@test Tables.getcolumn((;eachcol(df, true)...), :a) == Tables.getcolumn(df, :a)
@test Tables.columnnames((;eachcol(df, true)...)) == Tuple(Tables.columnnames(df))

df = DataFrame(A = Vector{Union{Int, Missing}}(1:2), B = Vector{Union{Int, Missing}}(2:3))
@test size(eachcol(df, true)) == (size(df, 2),)
@test parent(DataFrame(eachcol(df, true))) == df
@test names(DataFrame(eachcol(df, true))) == names(df)
@test IndexStyle(eachcol(df, true)) == IndexLinear()
@test size(eachcol(df, false)) == (size(df, 2),)
@test IndexStyle(eachcol(df, false)) == IndexLinear()
@test length(eachcol(df, true)) == size(df, 2)
@test length(eachcol(df, false)) == size(df, 2)
@test eachcol(df, true)[1] == (:A => df[:, 1])
@test eachcol(df, false)[1] == df[:, 1]
@test collect(eachcol(df, true)) isa Vector{Pair{Symbol, AbstractVector}}
@test collect(eachcol(df, true)) == [:A => [1, 2], :B => [2, 3]]
@test collect(eachcol(df, false)) isa Vector{AbstractVector}
@test collect(eachcol(df, false)) == [[1, 2], [2, 3]]
@test eltype(eachcol(df, true)) == Pair{Symbol, AbstractVector}
@test eltype(eachcol(df, false)) == AbstractVector
for col in eachcol(df, true)
@test typeof(col) <: Pair{Symbol, <:AbstractVector}
end
for col in eachcol(df, false)
@test isa(col, AbstractVector)
end
@test map(minimum, eachcol(df, false)) == [1, 2]
@test eltype(map(Vector{Float64}, eachcol(df, false))) == Vector{Float64}

df = DataFrame([11:16 21:26 31:36 41:46])
sdf = view(df, [3,1,4], [3,1,4])
@test eachcol(sdf, true) == eachcol(df[[3,1,4], [3,1,4]], true)
@test eachcol(sdf, false) == eachcol(df[[3,1,4], [3,1,4]], false)
@test size(eachcol(sdf, true)) == (3,)
@test size(eachcol(sdf, false)) == (3,)

df_base = DataFrame([11:16 21:26 31:36 41:46])
for df in (df_base, view(df_base, 1:3, 1:3))
@test df == DataFrame(eachcol(df, true))
end
end

global_logger(old_logger)

end # module
3 changes: 0 additions & 3 deletions test/indexing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,6 @@ using Test, DataFrames
@test dfx[!, 1] === df[!, names(dfx)[1]]
end

@test eachcol(df)[1] === last(eachcol(df, true)[1])
@test eachcol(df)[1] === last(eachcol(df, true)[1])

@test df[1, 1] == 1
@test df[1, 1:2] isa DataFrameRow
@test df[1, r"[ab]"] isa DataFrameRow
Expand Down
15 changes: 2 additions & 13 deletions test/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -79,17 +79,7 @@ end
io = IOBuffer()
show(io, "text/html", eachcol(df))
str = String(take!(io))
@test str == "<p>2×2 DataFrameColumns (with names=false)</p>" *
"<table class=\"data-frame\"><thead><tr><th>" *
"</th><th>Fish</th><th>Mass</th></tr>" *
"<tr><th></th><th>String</th><th>Float64⍰</th></tr></thead><tbody>" *
"<tr><th>1</th><td>#undef</td><td>1.5</td></tr>" *
"<tr><th>2</th><td>#undef</td><td>missing</td></tr></tbody></table>"

io = IOBuffer()
show(io, "text/html", eachcol(df, true))
str = String(take!(io))
@test str == "<p>2×2 DataFrameColumns (with names=true)</p>" *
@test str == "<p>2×2 DataFrameColumns</p>" *
"<table class=\"data-frame\"><thead><tr><th>" *
"</th><th>Fish</th><th>Mass</th></tr>" *
"<tr><th></th><th>String</th><th>Float64⍰</th></tr></thead><tbody>" *
Expand Down Expand Up @@ -244,8 +234,7 @@ end
(DataFrames.index(df[1:1, 1:1]), "data frame with 1 column"),
(DataFrames.index(view(df, 1:1, 1:1)), "data frame with 1 column"),
(eachrow(df), "2-element DataFrameRows"),
(eachcol(df), "3-element DataFrameColumns (with names=false)"),
(eachcol(df, true), "3-element DataFrameColumns (with names=true)")]
(eachcol(df), "3-element DataFrameColumns")]
@test summary(v) == s
io = IOBuffer()
summary(io, v)
Expand Down
Loading