Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

End deprecation period of eachcol #1752

Merged
merged 8 commits into from
Apr 1, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions docs/src/lib/types.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,10 @@ Similarly, the `eachcol` function returns a value of the `DataFrameColumns` type
serves as an iterator over columns of an `AbstractDataFrame`.
The return value can have two concrete types:

* If the `eachcol` function is called with the `names` argument set to `true` (currently the default,
but in the future the default will change to `false`) then it returns a value of the
* If the `eachcol` function is called with the `names` argument set to `true` then it returns a value of the
`DataFrameColumns{<:AbstractDataFrame, Pair{Symbol, AbstractVector}}` type, which is an
iterator returning a pair containing the column name and the column vector.
* If the `eachcol` function is called with `names` argument set to `false` then it returns a value of the
* If the `eachcol` function is called with `names` argument set to `false` (the default) then it returns a value of the
`DataFrameColumns{<:AbstractDataFrame, AbstractVector}` type, which is an
iterator returning the column vector only.

Expand Down
42 changes: 21 additions & 21 deletions src/abstractdataframe/abstractdataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ eltypes(df)
```

"""
eltypes(df::AbstractDataFrame) = eltype.(columns(df))
eltypes(df::AbstractDataFrame) = eltype.(eachcol(df))

Base.size(df::AbstractDataFrame) = (nrow(df), ncol(df))
function Base.size(df::AbstractDataFrame, i::Integer)
Expand Down Expand Up @@ -244,7 +244,7 @@ that is different than the number of rows present in `df`.
"""
function Base.similar(df::AbstractDataFrame, rows::Integer = size(df, 1))
rows < 0 && throw(ArgumentError("the number of rows must be non-negative"))
DataFrame(AbstractVector[similar(x, rows) for x in columns(df)], copy(index(df)))
DataFrame(AbstractVector[similar(x, rows) for x in eachcol(df)], copy(index(df)))
end

##############################################################################
Expand Down Expand Up @@ -341,15 +341,15 @@ describe(df::AbstractDataFrame, stats::Union{Symbol, Pair{<:Symbol}}...)
**Arguments**

* `df` : the `AbstractDataFrame`
* `stats::Union{Symbol, Pair{<:Symbol}}...` : the summary statistics to report.
Arguments can be:
* A symbol from the list `:mean`, `:std`, `:min`, `:q25`,
`:median`, `:q75`, `:max`, `:eltype`, `:nunique`, `:first`, `:last`, and
* `stats::Union{Symbol, Pair{<:Symbol}}...` : the summary statistics to report.
Arguments can be:
* A symbol from the list `:mean`, `:std`, `:min`, `:q25`,
`:median`, `:q75`, `:max`, `:eltype`, `:nunique`, `:first`, `:last`, and
`:nmissing`. The default statistics used
are `:mean`, `:min`, `:median`, `:max`, `:nunique`, `:nmissing`, and `:eltype`.
* `:all` as the only `Symbol` argument to return all statistics.
* `:all` as the only `Symbol` argument to return all statistics.
* A `name => function` pair where `name` is a `Symbol`. This will create
a column of summary statistics with the provided name.
a column of summary statistics with the provided name.

**Result**

Expand Down Expand Up @@ -388,16 +388,16 @@ describe(df, :min, :sum => sum)
```

"""
StatsBase.describe(df::AbstractDataFrame, stats::Union{Symbol, Pair{Symbol}}...) =
StatsBase.describe(df::AbstractDataFrame, stats::Union{Symbol, Pair{Symbol}}...) =
_describe(df, collect(stats))

# TODO: un-comment this method definition after the deprecation period of
# the `stats` keyword for `describe`.
# StatsBase.describe(df::AbstractDataFrame) =
# the `stats` keyword for `describe`.
# StatsBase.describe(df::AbstractDataFrame) =
# _describe(df, [:mean, :min, :median, :max, :nunique, :nmissing, :eltype])

function _describe(df::AbstractDataFrame, stats::AbstractVector)
predefined_funs = Symbol[s for s in stats if s isa Symbol]
function _describe(df::AbstractDataFrame, stats::AbstractVector)
predefined_funs = Symbol[s for s in stats if s isa Symbol]

allowed_fields = [:mean, :std, :min, :q25, :median, :q75,
:max, :nunique, :nmissing, :first, :last, :eltype]
Expand All @@ -406,8 +406,8 @@ function _describe(df::AbstractDataFrame, stats::AbstractVector)
predefined_funs = allowed_fields
i = findfirst(s -> s == :all, stats)
splice!(stats, i, allowed_fields) # insert in the stats vector to get a good order
elseif :all in predefined_funs
throw(ArgumentError("`:all` must be the only `Symbol` argument."))
elseif :all in predefined_funs
throw(ArgumentError("`:all` must be the only `Symbol` argument."))
elseif !issubset(predefined_funs, allowed_fields)
not_allowed = join(setdiff(predefined_funs, allowed_fields), ", :")
allowed_msg = "\nAllowed fields are: :" * join(allowed_fields, ", :")
Expand All @@ -417,7 +417,7 @@ function _describe(df::AbstractDataFrame, stats::AbstractVector)
custom_funs = Pair[s for s in stats if s isa Pair]

ordered_names = [s isa Symbol ? s : s[1] for s in stats]

if !allunique(ordered_names)
duplicate_names = unique(ordered_names[nonunique(DataFrame(ordered_names = ordered_names))])
throw(ArgumentError("Duplicate names not allowed. Duplicated value(s) are: :$(join(duplicate_names, ", "))"))
Expand All @@ -428,7 +428,7 @@ function _describe(df::AbstractDataFrame, stats::AbstractVector)
data[:variable] = names(df)

# An array of Dicts for summary statistics
column_stats_dicts = map(columns(df)) do col
column_stats_dicts = map(eachcol(df)) do col
if eltype(col) >: Missing
t = collect(skipmissing(col))
d = get_stats(t, predefined_funs)
Expand All @@ -438,11 +438,11 @@ function _describe(df::AbstractDataFrame, stats::AbstractVector)
get_stats!(d, col, custom_funs)
end

if :nmissing in predefined_funs
if :nmissing in predefined_funs
d[:nmissing] = eltype(col) >: Missing ? count(ismissing, col) : nothing
end

if :first in predefined_funs
if :first in predefined_funs
d[:first] = isempty(col) ? nothing : first(col)
end

Expand Down Expand Up @@ -509,7 +509,7 @@ function get_stats(col::AbstractVector, stats::AbstractVector{Symbol})
end

function get_stats!(d::Dict, col::AbstractVector, stats::AbstractVector{<:Pair})
for stat in stats
for stat in stats
d[stat[1]] = try stat[2](col) catch end
end
end
Expand Down Expand Up @@ -843,7 +843,7 @@ function Base.convert(::Type{Matrix{T}}, df::AbstractDataFrame) where T
n, p = size(df)
res = Matrix{T}(undef, n, p)
idx = 1
for (name, col) in zip(names(df), columns(df))
for (name, col) in eachcol(df, true)
try
copyto!(res, idx, col)
catch err
Expand Down
46 changes: 17 additions & 29 deletions src/abstractdataframe/iteration.jl
Original file line number Diff line number Diff line change
Expand Up @@ -99,13 +99,12 @@ struct DataFrameColumns{T<:AbstractDataFrame, V} <: AbstractVector{V}
end

"""
eachcol(df::AbstractDataFrame, names::Bool=true)
eachcol(df::AbstractDataFrame, names::Bool=false)

Return a `DataFrameColumns` that iterates an `AbstractDataFrame` column by column.
If `names` is equal to `true` (currently the default, in the future the default
will be set to `false`) iteration returns a pair consisting of column name
and column vector.
If `names` is equal to `false` then column vectors are yielded.
If `names` is equal to `false` (the default) iteration returns column vectors.
If `names` is equal to `true` pairs consisting of column name and column vector
are yielded.

**Examples**

Expand All @@ -120,48 +119,37 @@ julia> df = DataFrame(x=1:4, y=11:14)
│ 3 │ 3 │ 13 │
│ 4 │ 4 │ 14 │

julia> collect(eachcol(df, true))
2-element Array{Pair{Symbol,AbstractArray{T,1} where T},1}:
:x => [1, 2, 3, 4]
:y => [11, 12, 13, 14]

julia> collect(eachcol(df, false))
julia> collect(eachcol(df))
bkamins marked this conversation as resolved.
Show resolved Hide resolved
2-element Array{AbstractArray{T,1} where T,1}:
[1, 2, 3, 4]
[11, 12, 13, 14]

julia> sum.(eachcol(df, false))
2-element Array{Int64,1}:
10
50

julia> map(eachcol(df, false)) do col
julia> map(eachcol(df)) do col
maximum(col) - minimum(col)
end
2-element Array{Int64,1}:
3
3

julia> sum.(eachcol(df))
2-element Array{Int64,1}:
10
50

julia> collect(eachcol(df, true))
2-element Array{Pair{Symbol,AbstractArray{T,1} where T},1}:
:x => [1, 2, 3, 4]
:y => [11, 12, 13, 14]
```
"""
@inline function eachcol(df::T, names::Bool) where T<: AbstractDataFrame
@inline function eachcol(df::T, names::Bool=false) where T<: AbstractDataFrame
if names
DataFrameColumns{T, Pair{Symbol, AbstractVector}}(df)
else
DataFrameColumns{T, AbstractVector}(df)
end
end

# TODO: remove this method after deprecation
# and add default argument value above
function eachcol(df::AbstractDataFrame)
Base.depwarn("In the future eachcol will have names argument set to false by default", :eachcol)
eachcol(df, true)
end

# TODO: remove this method after deprecation
# this is left to make sure we do not forget to properly fix columns calls
columns(df::AbstractDataFrame) = eachcol(df, false)

Base.size(itr::DataFrameColumns) = (size(itr.df, 2),)
Base.IndexStyle(::Type{<:DataFrameColumns}) = Base.IndexLinear()

Expand Down
8 changes: 4 additions & 4 deletions src/abstractdataframe/join.jl
Original file line number Diff line number Diff line change
Expand Up @@ -89,13 +89,13 @@ function compose_joined_table(joiner::DataFrameJoiner, kind::Symbol,
cols = Vector{AbstractVector}(undef, ncleft + ncol(dfr_noon))
# inner and left joins preserve non-missingness of the left frame
_similar_left = kind == :inner || kind == :left ? similar : similar_missing
for (i, col) in enumerate(columns(joiner.dfl))
for (i, col) in enumerate(eachcol(joiner.dfl))
cols[i] = _similar_left(col, nrow)
copyto!(cols[i], view(col, all_orig_left_ixs))
end
# inner and right joins preserve non-missingness of the right frame
_similar_right = kind == :inner || kind == :right ? similar : similar_missing
for (i, col) in enumerate(columns(dfr_noon))
for (i, col) in enumerate(eachcol(dfr_noon))
cols[i+ncleft] = _similar_right(col, nrow)
copyto!(cols[i+ncleft], view(col, all_orig_right_ixs))
permute!(cols[i+ncleft], right_perm)
Expand Down Expand Up @@ -407,7 +407,7 @@ end
function crossjoin(df1::AbstractDataFrame, df2::AbstractDataFrame; makeunique::Bool=false)
r1, r2 = size(df1, 1), size(df2, 1)
colindex = merge(index(df1), index(df2), makeunique=makeunique)
cols = Any[[repeat(c, inner=r2) for c in columns(df1)];
[repeat(c, outer=r1) for c in columns(df2)]]
cols = Any[[repeat(c, inner=r2) for c in eachcol(df1)];
[repeat(c, outer=r1) for c in eachcol(df2)]]
DataFrame(cols, colindex)
end
10 changes: 5 additions & 5 deletions src/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import Base: @deprecate

import Base: keys, values, insert!
@deprecate keys(df::AbstractDataFrame) names(df)
@deprecate values(df::AbstractDataFrame) columns(df)
@deprecate values(df::AbstractDataFrame) eachcol(df)
@deprecate insert!(df::DataFrame, df2::AbstractDataFrame) (foreach(col -> df[col] = df2[col], names(df2)); df)

@deprecate pool categorical
Expand Down Expand Up @@ -1328,11 +1328,11 @@ import Base: vcat
@deprecate showcols(df::AbstractDataFrame, all::Bool=false, values::Bool=true) describe(df, :eltype, :nmissing, :first, :last)
@deprecate showcols(io::IO, df::AbstractDataFrame, all::Bool=false, values::Bool=true) show(io, describe(df, :eltype, :nmissing, :first, :last), all)
function StatsBase.describe(df::AbstractDataFrame; stats=nothing)
if stats === nothing
_describe(df, [:mean, :min, :median,
:max, :nunique, :nmissing,
if stats === nothing
_describe(df, [:mean, :min, :median,
:max, :nunique, :nmissing,
:eltype])
elseif stats === :all
elseif stats === :all
Base.depwarn("The `stats` keyword argument has been deprecated. Use describe(df, stats...) instead.", :describe)
_describe(df, [:mean, :std, :min, :q25, :median, :q75,
:max, :nunique, :nmissing, :first, :last, :eltype])
Expand Down
4 changes: 2 additions & 2 deletions src/groupeddataframe/grouping.jl
Original file line number Diff line number Diff line change
Expand Up @@ -683,7 +683,7 @@ function _combine(f::Union{AbstractVector{<:Pair}, Tuple{Vararg{Pair}},
incols = gd.parent[first(p)]
else
df = gd.parent[collect(first(p))]
incols = NamedTuple{Tuple(names(df))}(columns(df))
incols = NamedTuple{Tuple(names(df))}(eachcol(df))
end
firstres = do_call(fun, gd, incols, 1)
idx, outcols, _ = _combine_with_first(wrap(firstres), fun, gd, incols)
Expand Down Expand Up @@ -715,7 +715,7 @@ function _combine(f::Any, gd::GroupedDataFrame)
fun = last(f)
elseif f isa Pair
df = gd.parent[collect(first(f))]
incols = NamedTuple{Tuple(names(df))}(columns(df))
incols = NamedTuple{Tuple(names(df))}(eachcol(df))
fun = last(f)
else
incols = nothing
Expand Down
15 changes: 7 additions & 8 deletions test/cat.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
module TestCat

using Test, Random, DataFrames
using DataFrames: columns
const ≅ = isequal

#
Expand Down Expand Up @@ -158,7 +157,7 @@ end
@testset "vcat >2 args" begin
empty_dfs = [DataFrame(), DataFrame(), DataFrame()]
@test vcat(empty_dfs...) == reduce(vcat, empty_dfs) == DataFrame()

df = DataFrame(x = trues(1), y = falses(1))
dfs = [df, df, df]
@test vcat(dfs...) ==reduce(vcat, dfs) == DataFrame(x = trues(3), y = falses(3))
Expand All @@ -167,13 +166,13 @@ end
@testset "vcat mixed coltypes" begin
df = vcat(DataFrame([[1]], [:x]), DataFrame([[1.0]], [:x]))
@test df == DataFrame([[1.0, 1.0]], [:x])
@test typeof.(columns(df)) == [Vector{Float64}]
@test typeof.(eachcol(df)) == [Vector{Float64}]
df = vcat(DataFrame([[1]], [:x]), DataFrame([["1"]], [:x]))
@test df == DataFrame([[1, "1"]], [:x])
@test typeof.(columns(df)) == [Vector{Any}]
@test typeof.(eachcol(df)) == [Vector{Any}]
df = vcat(DataFrame([Union{Missing, Int}[1]], [:x]), DataFrame([[1]], [:x]))
@test df == DataFrame([[1, 1]], [:x])
@test typeof.(columns(df)) == [Vector{Union{Missing, Int}}]
@test typeof.(eachcol(df)) == [Vector{Union{Missing, Int}}]
df = vcat(DataFrame([CategoricalArray([1])], [:x]), DataFrame([[1]], [:x]))
@test df == DataFrame([[1, 1]], [:x])
@test df[:x] isa Vector{Int}
Expand All @@ -188,14 +187,14 @@ end
df = vcat(DataFrame([Union{Int, Missing}[1]], [:x]),
DataFrame([["1"]], [:x]))
@test df == DataFrame([[1, "1"]], [:x])
@test typeof.(columns(df)) == [Vector{Any}]
@test typeof.(eachcol(df)) == [Vector{Any}]
df = vcat(DataFrame([CategoricalArray([1])], [:x]),
DataFrame([CategoricalArray(["1"])], [:x]))
@test df == DataFrame([[1, "1"]], [:x])
@test df[:x] isa CategoricalVector{Any}
df = vcat(DataFrame([trues(1)], [:x]), DataFrame([[false]], [:x]))
@test df == DataFrame([[true, false]], [:x])
@test typeof.(columns(df)) == [Vector{Bool}]
@test typeof.(eachcol(df)) == [Vector{Bool}]
end

@testset "vcat out of order" begin
Expand All @@ -213,7 +212,7 @@ end
@test vcat(df2, df1, df2) == DataFrame([[2, 4, 6, 7, 8, 9, 2, 4, 6],
[8, 10, 12, 4, 5, 6, 8, 10, 12],
[14, 16, 18, 1, 2, 3, 14, 16, 18]] ,[:C, :B, :A])

@test size(vcat(df1, df1, df1, df2, df2, df2)) == (18, 3)
df3 = df1[[1, 3, 2]]
res = vcat(df1, df1, df1, df2, df2, df2, df3, df3, df3, df3)
Expand Down
7 changes: 3 additions & 4 deletions test/constructors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ module TestConstructors

using Test, DataFrames
using DataFrames: Index, _columns, index
using DataFrames: columns
const ≅ = isequal

#
Expand Down Expand Up @@ -147,13 +146,13 @@ end
@testset "column types" begin
df = DataFrame(A = 1:3, B = 2:4, C = 3:5)
answer = [Array{Int,1}, Array{Int,1}, Array{Int,1}]
@test map(typeof, columns(df)) == answer
@test map(typeof, eachcol(df)) == answer
df[:D] = [4, 5, missing]
push!(answer, Vector{Union{Int, Missing}})
@test map(typeof, columns(df)) == answer
@test map(typeof, eachcol(df)) == answer
df[:E] = 'c'
push!(answer, Vector{Char})
@test map(typeof, columns(df)) == answer
@test map(typeof, eachcol(df)) == answer
end

@testset "categorical constructor" begin
Expand Down
Loading