Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deprecate length, nrow, and ncol on DataFrames in favor of size. Fixe… #1224

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 9 additions & 56 deletions src/abstractdataframe/abstractdataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ abstract type AbstractDataFrame end
##############################################################################

# index(df) => AbstractIndex
# nrow(df) => Int
# ncol(df) => Int
# size(df, 1) => Int
# size(df, 2) => Int
# getindex(...)
# setindex!(...) exclusive of methods that add new columns

Expand Down Expand Up @@ -203,19 +203,7 @@ eltypes(df)
"""
eltypes(df::AbstractDataFrame) = map!(eltype, Vector{Type}(size(df,2)), columns(df))

Base.size(df::AbstractDataFrame) = (nrow(df), ncol(df))
function Base.size(df::AbstractDataFrame, i::Integer)
if i == 1
nrow(df)
elseif i == 2
ncol(df)
else
throw(ArgumentError("DataFrames only have two dimensions"))
end
end

Base.length(df::AbstractDataFrame) = ncol(df)
Base.endof(df::AbstractDataFrame) = ncol(df)
Base.endof(df::AbstractDataFrame) = size(df, 2)

Base.ndims(::AbstractDataFrame) = 2

Expand Down Expand Up @@ -254,17 +242,17 @@ end

Base.haskey(df::AbstractDataFrame, key::Any) = haskey(index(df), key)
Base.get(df::AbstractDataFrame, key::Any, default::Any) = haskey(df, key) ? df[key] : default
Base.isempty(df::AbstractDataFrame) = ncol(df) == 0
Base.isempty(df::AbstractDataFrame) = size(df, 2) == 0

##############################################################################
##
## Description
##
##############################################################################

head(df::AbstractDataFrame, r::Int) = df[1:min(r,nrow(df)), :]
head(df::AbstractDataFrame, r::Int) = df[1:min(r,size(df, 1)), :]
head(df::AbstractDataFrame) = head(df, 6)
tail(df::AbstractDataFrame, r::Int) = df[max(1,nrow(df)-r+1):nrow(df), :]
tail(df::AbstractDataFrame, r::Int) = df[max(1,size(df, 1)-r+1):size(df, 1), :]
tail(df::AbstractDataFrame) = tail(df, 6)

"""
Expand Down Expand Up @@ -323,7 +311,7 @@ dump(df)

"""
function Base.dump(io::IO, df::AbstractDataFrame, n::Int, indent)
println(io, typeof(df), " $(nrow(df)) observations of $(ncol(df)) variables")
println(io, typeof(df), " $(size(df, 1)) observations of $(size(df, 2)) variables")
if n > 0
for (name, col) in eachcol(df)
print(io, indent, " ", name, ": ")
Expand Down Expand Up @@ -568,7 +556,7 @@ function nonunique(df::AbstractDataFrame)
gslots = row_group_slots(df)[3]
# unique rows are the first encountered group representatives,
# nonunique are everything else
res = fill(true, nrow(df))
res = fill(true, size(df, 1))
@inbounds for g_row in gslots
(g_row > 0) && (res[g_row] = false)
end
Expand Down Expand Up @@ -637,7 +625,7 @@ function colmissing(df::AbstractDataFrame) # -> Vector{Int}
end

function without(df::AbstractDataFrame, icols::Vector{Int})
newcols = setdiff(1:ncol(df), icols)
newcols = setdiff(1:size(df, 2), icols)
df[newcols]
end
without(df::AbstractDataFrame, i::Int) = without(df, [i])
Expand Down Expand Up @@ -767,38 +755,3 @@ function Base.hash(df::AbstractDataFrame)
end
return UInt(h)
end


## Documentation for methods defined elsewhere

"""
Number of rows or columns in an AbstractDataFrame

```julia
nrow(df::AbstractDataFrame)
ncol(df::AbstractDataFrame)
```

**Arguments**

* `df` : the AbstractDataFrame

**Result**

* `::AbstractDataFrame` : the updated version

See also [`size`](@ref).

NOTE: these functions may be depreciated for `size`.

**Examples**

```julia
df = DataFrame(i = 1:10, x = rand(10), y = rand(["a", "b", "c"], 10))
size(df)
nrow(df)
ncol(df)
```

"""
# nrow, ncol
6 changes: 3 additions & 3 deletions src/abstractdataframe/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -208,13 +208,13 @@ DataFrameStream(df::DataFrame) = DataFrameStream(Tuple(df.columns), string.(name
# DataFrame Data.Source implementation
function Data.schema(df::DataFrame)
return Data.Schema(Type[eltype(A) for A in df.columns],
string.(names(df)), length(df) == 0 ? 0 : length(df.columns[1]))
string.(names(df)), size(df, 1))
end

Data.isdone(source::DataFrame, row, col, rows, cols) = row > rows || col > cols
function Data.isdone(source::DataFrame, row, col)
cols = length(source)
return Data.isdone(source, row, col, cols == 0 ? 0 : length(df.columns[1]), cols)
rows, cols = size(source)
return Data.isdone(source, row, col, rows, cols)
end

Data.streamtype(::Type{DataFrame}, ::Type{Data.Column}) = true
Expand Down
32 changes: 16 additions & 16 deletions src/abstractdataframe/join.jl
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ function compose_joined_table(joiner::DataFrameJoiner, kind::Symbol,

nrow = length(all_orig_left_ixs) + roil
@assert nrow == length(all_orig_right_ixs) + loil
ncleft = ncol(joiner.dfl)
cols = Vector{Any}(ncleft + ncol(dfr_noon))
ncleft = size(joiner.dfl, 2)
cols = Vector{Any}(ncleft + size(dfr_noon, 2))
_similar = kind == :inner ? similar : similar_nullable
for (i, col) in enumerate(columns(joiner.dfl))
cols[i] = _similar(col, nrow)
Expand Down Expand Up @@ -132,10 +132,10 @@ function update_row_maps!(left_table::AbstractDataFrame,
@inline update!(mask::Vector{Bool}, orig_ixs::AbstractArray) = (mask[orig_ixs] = false)

# iterate over left rows and compose the left<->right index map
right_dict_cols = ntuple(i -> right_dict.df[i], ncol(right_dict.df))
left_table_cols = ntuple(i -> left_table[i], ncol(left_table))
right_dict_cols = ntuple(i -> right_dict.df[i], size(right_dict.df, 2))
left_table_cols = ntuple(i -> left_table[i], size(left_table, 2))
next_join_ix = 1
for l_ix in 1:nrow(left_table)
for l_ix in 1:size(left_table, 1)
r_ixs = findrows(right_dict, left_table, right_dict_cols, left_table_cols, l_ix)
if isempty(r_ixs)
update!(leftonly_ixs, l_ix, next_join_ix)
Expand Down Expand Up @@ -164,16 +164,16 @@ function update_row_maps!(left_table::AbstractDataFrame,
map_left::Bool, map_leftonly::Bool,
map_right::Bool, map_rightonly::Bool)
init_map(df::AbstractDataFrame, init::Bool) = init ?
RowIndexMap(sizehint!(Vector{Int}(), nrow(df)),
sizehint!(Vector{Int}(), nrow(df))) : nothing
RowIndexMap(sizehint!(Vector{Int}(), size(df, 1)),
sizehint!(Vector{Int}(), size(df, 1))) : nothing
to_bimap(x::RowIndexMap) = x
to_bimap(::Void) = RowIndexMap(Vector{Int}(), Vector{Int}())

# init maps as requested
left_ixs = init_map(left_table, map_left)
leftonly_ixs = init_map(left_table, map_leftonly)
right_ixs = init_map(right_table, map_right)
rightonly_mask = map_rightonly ? fill(true, nrow(right_table)) : nothing
rightonly_mask = map_rightonly ? fill(true, size(right_table, 1)) : nothing
update_row_maps!(left_table, right_table, right_dict, left_ixs, leftonly_ixs, right_ixs, rightonly_mask)
if map_rightonly
rightonly_orig_ixs = find(rightonly_mask)
Expand Down Expand Up @@ -276,10 +276,10 @@ function Base.join(df1::AbstractDataFrame,
dfr_on_grp = group_rows(joiner.dfr_on)
# iterate over left rows and leave those found in right
left_ixs = Vector{Int}()
sizehint!(left_ixs, nrow(joiner.dfl))
dfr_on_grp_cols = ntuple(i -> dfr_on_grp.df[i], ncol(dfr_on_grp.df))
dfl_on_cols = ntuple(i -> joiner.dfl_on[i], ncol(joiner.dfl_on))
@inbounds for l_ix in 1:nrow(joiner.dfl_on)
sizehint!(left_ixs, size(joiner.dfl, 1))
dfr_on_grp_cols = ntuple(i -> dfr_on_grp.df[i], size(dfr_on_grp.df, 2))
dfl_on_cols = ntuple(i -> joiner.dfl_on[i], size(joiner.dfl_on, 2))
@inbounds for l_ix in 1:size(joiner.dfl_on, 1)
if findrow(dfr_on_grp, joiner.dfl_on, dfr_on_grp_cols, dfl_on_cols, l_ix) != 0
push!(left_ixs, l_ix)
end
Expand All @@ -290,10 +290,10 @@ function Base.join(df1::AbstractDataFrame,
dfr_on_grp = group_rows(joiner.dfr_on)
# iterate over left rows and leave those not found in right
leftonly_ixs = Vector{Int}()
sizehint!(leftonly_ixs, nrow(joiner.dfl))
dfr_on_grp_cols = ntuple(i -> dfr_on_grp.df[i], ncol(dfr_on_grp.df))
dfl_on_cols = ntuple(i -> joiner.dfl_on[i], ncol(joiner.dfl_on))
@inbounds for l_ix in 1:nrow(joiner.dfl_on)
sizehint!(leftonly_ixs, size(joiner.dfl, 1))
dfr_on_grp_cols = ntuple(i -> dfr_on_grp.df[i], size(dfr_on_grp.df, 2))
dfl_on_cols = ntuple(i -> joiner.dfl_on[i], size(joiner.dfl_on, 2))
@inbounds for l_ix in 1:size(joiner.dfl_on, 1)
if findrow(dfr_on_grp, joiner.dfl_on, dfr_on_grp_cols, dfl_on_cols, l_ix) == 0
push!(leftonly_ixs, l_ix)
end
Expand Down
16 changes: 8 additions & 8 deletions src/abstractdataframe/reshape.jl
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ function stack(df::AbstractDataFrame, measure_vars::Vector{Int},
cnames = names(df)[id_vars]
insert!(cnames, 1, value_name)
insert!(cnames, 1, variable_name)
DataFrame(Any[repeat(_names(df)[measure_vars], inner=nrow(df)), # variable
DataFrame(Any[repeat(_names(df)[measure_vars], inner=size(df, 1)), # variable
vcat([df[c] for c in measure_vars]...), # value
[repeat(df[c], outer=N) for c in id_vars]...], # id_var columns
cnames)
Expand Down Expand Up @@ -114,7 +114,7 @@ numeric_vars(df::AbstractDataFrame) =
function stack(df::AbstractDataFrame, measure_vars = numeric_vars(df);
variable_name::Symbol=:variable, value_name::Symbol=:value)
mv_inds = index(df)[measure_vars]
stack(df, mv_inds, setdiff(1:ncol(df), mv_inds);
stack(df, mv_inds, setdiff(1:size(df, 2), mv_inds);
variable_name=variable_name, value_name=value_name)
end

Expand All @@ -129,7 +129,7 @@ end
function melt(df::AbstractDataFrame, id_vars;
variable_name::Symbol=:variable, value_name::Symbol=:value)
id_inds = index(df)[id_vars]
stack(df, setdiff(1:ncol(df), id_inds), id_inds;
stack(df, setdiff(1:size(df, 2), id_inds), id_inds;
variable_name=variable_name, value_name=value_name)
end
function melt(df::AbstractDataFrame, id_vars, measure_vars;
Expand Down Expand Up @@ -200,7 +200,7 @@ function unstack(df::AbstractDataFrame, rowkey::Int, colkey::Int, value::Int)
Ncol = length(keycol.pool)
payload = DataFrame(Any[similar_nullable(valuecol, Nrow) for i in 1:Ncol], map(Symbol, levels(keycol)))
nowarning = true
for k in 1:nrow(df)
for k in 1:size(df, 1)
j = Int(CategoricalArrays.order(keycol.pool)[keycol.refs[k]])
i = Int(CategoricalArrays.order(refkeycol.pool)[refkeycol.refs[k]])
if i > 0 && j > 0
Expand Down Expand Up @@ -237,7 +237,7 @@ function unstack(df::AbstractDataFrame, colkey::Int, value::Int)
Ncol = length(levels(keycol))
df2 = DataFrame(Any[similar_nullable(valuecol, Nrow) for i in 1:Ncol], map(Symbol, levels(keycol)))
nowarning = true
for k in 1:nrow(df)
for k in 1:size(df, 1)
j = Int(CategoricalArrays.order(keycol.pool)[keycol.refs[k]])
i = rowkey[k]
if i > 0 && j > 0
Expand Down Expand Up @@ -451,7 +451,7 @@ function stackdf(df::AbstractDataFrame, measure_vars::Vector{Int},
cnames = names(df)[id_vars]
insert!(cnames, 1, value_name)
insert!(cnames, 1, variable_name)
DataFrame(Any[RepeatedVector(_names(df)[measure_vars], nrow(df), 1), # variable
DataFrame(Any[RepeatedVector(_names(df)[measure_vars], size(df, 1), 1), # variable
StackedVector(Any[df[:,c] for c in measure_vars]), # value
[RepeatedVector(df[:,c], 1, N) for c in id_vars]...], # id_var columns
cnames)
Expand Down Expand Up @@ -479,7 +479,7 @@ end
function stackdf(df::AbstractDataFrame, measure_vars = numeric_vars(df);
variable_name::Symbol=:variable, value_name::Symbol=:value)
m_inds = index(df)[measure_vars]
stackdf(df, m_inds, setdiff(1:ncol(df), m_inds);
stackdf(df, m_inds, setdiff(1:size(df, 2), m_inds);
variable_name=variable_name, value_name=value_name)
end

Expand All @@ -489,7 +489,7 @@ A stacked view of a DataFrame (long format); see `stackdf`
function meltdf(df::AbstractDataFrame, id_vars; variable_name::Symbol=:variable,
value_name::Symbol=:value)
id_inds = index(df)[id_vars]
stackdf(df, setdiff(1:ncol(df), id_inds), id_inds;
stackdf(df, setdiff(1:size(df, 2), id_inds), id_inds;
variable_name=variable_name, value_name=value_name)
end
function meltdf(df::AbstractDataFrame, id_vars, measure_vars;
Expand Down
4 changes: 2 additions & 2 deletions src/abstractdataframe/sort.jl
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ immutable DFPerm{O<:Union{Ordering, AbstractVector}, DF<:AbstractDataFrame} <: O
end

function DFPerm{O<:Ordering, DF<:AbstractDataFrame}(ords::AbstractVector{O}, df::DF)
if length(ords) != ncol(df)
if length(ords) != size(df, 2)
error("DFPerm: number of column orderings does not equal the number of DataFrame columns")
end
DFPerm{typeof(ords), DF}(ords, df)
Expand All @@ -77,7 +77,7 @@ Base.@propagate_inbounds Base.getindex(o::DFPerm, i::Int, j::Int) = o.df[i, j]
Base.@propagate_inbounds Base.getindex(o::DFPerm, a::DataFrameRow, j::Int) = a[j]

function Sort.lt(o::DFPerm, a, b)
@inbounds for i = 1:ncol(o.df)
@inbounds for i = 1:size(o.df, 2)
ord = col_ordering(o, i)
va = o[a, i]
vb = o[b, i]
Expand Down
Loading