Skip to content

Commit

Permalink
add prepend! (#3068)
Browse files Browse the repository at this point in the history
  • Loading branch information
bkamins authored Jun 9, 2022
1 parent fec65bf commit a8af8d2
Show file tree
Hide file tree
Showing 4 changed files with 401 additions and 49 deletions.
1 change: 1 addition & 0 deletions docs/src/lib/functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ invpermute!
mapcols
mapcols!
permute!
prepend!
push!
reduce
repeat
Expand Down
153 changes: 137 additions & 16 deletions src/dataframe/dataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1284,8 +1284,9 @@ columns that are aliases (equal when compared with `===`).
# See also
Use [`push!`](@ref) to add individual rows to a data frame and [`vcat`](@ref)
to vertically concatenate data frames.
Use [`push!`](@ref) to add individual rows to a data frame, [`prepend!`](@ref)
to add a table at the beginning, and [`vcat`](@ref) to vertically concatenate
data frames.
# Examples
```jldoctest
Expand Down Expand Up @@ -1322,8 +1323,98 @@ julia> df1
6 │ 6 6
```
"""
function Base.append!(df1::DataFrame, df2::AbstractDataFrame; cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset]))
Base.append!(df1::DataFrame, df2::AbstractDataFrame; cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset])) =
_append_or_prepend!(df1, df2, cols=cols, promote=promote, atend=true)

# TODO: add a reference to pushfirst when it is added:
# [`pushfirst!`](@ref) to add individual rows at the beginning of a data frame,
"""
prepend!(df::DataFrame, df2::AbstractDataFrame; cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset]))
prepend!(df::DataFrame, table; cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset]))
Add the rows of `df2` to the beginning of `df`. If the second argument `table`
is not an `AbstractDataFrame` then it is converted using
`DataFrame(table, copycols=false)` before being prepended.
The exact behavior of `prepend!` depends on the `cols` argument:
* If `cols == :setequal` (this is the default)
then `df2` must contain exactly the same columns as `df` (but possibly in a
different order).
* If `cols == :orderequal` then `df2` must contain the same columns in the same
order (for `AbstractDict` this option requires that `keys(row)` matches
`propertynames(df)` to allow for support of ordered dicts; however, if `df2`
is a `Dict` an error is thrown as it is an unordered collection).
* If `cols == :intersect` then `df2` may contain more columns than `df`, but all
column names that are present in `df` must be present in `df2` and only these
are used.
* If `cols == :subset` then `append!` behaves like for `:intersect` but if some
column is missing in `df2` then a `missing` value is pushed to `df`.
* If `cols == :union` then `append!` adds columns missing in `df` that are present
in `df2`, for columns present in `df` but missing in `df2` a `missing` value
is pushed.
If `promote=true` and element type of a column present in `df` does not allow
the type of a pushed argument then a new column with a promoted element type
allowing it is freshly allocated and stored in `df`. If `promote=false` an error
is thrown.
The above rule has the following exceptions:
* If `df` has no columns then copies of columns from `df2` are added to it.
* If `df2` has no columns then calling `prepend!` leaves `df` unchanged.
Please note that `prepend!` must not be used on a `DataFrame` that contains
columns that are aliases (equal when compared with `===`).
# See also
Use
[`append!`](@ref) to add a table at the end, and [`vcat`](@ref)
to vertically concatenate data frames.
# Examples
```jldoctest
julia> df1 = DataFrame(A=1:3, B=1:3)
3×2 DataFrame
Row │ A B
│ Int64 Int64
─────┼──────────────
1 │ 1 1
2 │ 2 2
3 │ 3 3
julia> df2 = DataFrame(A=4.0:6.0, B=4:6)
3×2 DataFrame
Row │ A B
│ Float64 Int64
─────┼────────────────
1 │ 4.0 4
2 │ 5.0 5
3 │ 6.0 6
julia> prepend!(df1, df2);
julia> df1
6×2 DataFrame
Row │ A B
│ Int64 Int64
─────┼──────────────
1 │ 4 4
2 │ 5 5
3 │ 6 6
4 │ 1 1
5 │ 2 2
6 │ 3 3
```
"""
Base.prepend!(df1::DataFrame, df2::AbstractDataFrame; cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset])) =
_append_or_prepend!(df1, df2, cols=cols, promote=promote, atend=false)

function _append_or_prepend!(df1::DataFrame, df2::AbstractDataFrame; cols::Symbol,
promote::Bool, atend::Bool)
if !(cols in (:orderequal, :setequal, :intersect, :subset, :union))
throw(ArgumentError("`cols` keyword argument must be " *
":orderequal, :setequal, :intersect, :subset or :union)"))
Expand Down Expand Up @@ -1371,8 +1462,9 @@ function Base.append!(df1::DataFrame, df2::AbstractDataFrame; cols::Symbol=:sete
end
end

nrows, ncols = size(df1)
targetrows = nrows + nrow(df2)
nrow1 = nrow(df1)
nrow2 = nrow(df2)
targetrows = nrow1 + nrow2
current_col = 0
# in the code below we use a direct access to _columns because
# we resize the columns so temporarily the `DataFrame` is internally
Expand All @@ -1387,24 +1479,42 @@ function Base.append!(df1::DataFrame, df2::AbstractDataFrame; cols::Symbol=:sete
T = eltype(df1_c)
if S <: T || !promote || promote_type(S, T) <: T
# if S <: T || promote_type(S, T) <: T this should never throw an exception
append!(df1_c, df2_c)
if atend
append!(df1_c, df2_c)
else
prepend!(df1_c, df2_c)
end
else
newcol = similar(df1_c, promote_type(S, T), targetrows)
copyto!(newcol, 1, df1_c, 1, nrows)
copyto!(newcol, nrows+1, df2_c, 1, targetrows - nrows)
firstindex(newcol) != 1 && _onebased_check_error()
if atend
copyto!(newcol, 1, df1_c, 1, nrow1)
copyto!(newcol, nrow1+1, df2_c, 1, nrow2)
else
copyto!(newcol, 1, df2_c, 1, nrow2)
copyto!(newcol, nrow2+1, df1_c, 1, nrow1)
end
_columns(df1)[j] = newcol
end
else
if Missing <: eltype(df1[!, j])
resize!(df1[!, j], targetrows)
df1[nrows+1:targetrows, j] .= missing
if atend
resize!(df1[!, j], targetrows)
df1[nrow1+1:targetrows, j] .= missing
else
prepend!(df1[!, j], Iterators.repeated(missing, nrow2))
end
elseif promote
newcol = similar(df1[!, j], Union{Missing, eltype(df1[!, j])},
targetrows)
copyto!(newcol, 1, df1[!, j], 1, nrows)
newcol[nrows+1:targetrows] .= missing
firstindex(newcol) != 1 && _onebased_check_error()
if atend
copyto!(newcol, 1, df1[!, j], 1, nrow1)
newcol[nrow1+1:targetrows] .= missing
else
copyto!(newcol, nrow2+1, df1[!, j], 1, nrow1)
newcol[1:nrow2] .= missing
end
_columns(df1)[j] = newcol
else
throw(ArgumentError("promote=false and source data frame does " *
Expand All @@ -1422,15 +1532,26 @@ function Base.append!(df1::DataFrame, df2::AbstractDataFrame; cols::Symbol=:sete
for n in setdiff(_names(df2), _names(df1))
newcol = similar(df2[!, n], Union{Missing, eltype(df2[!, n])},
targetrows)
@inbounds newcol[1:nrows] .= missing
copyto!(newcol, nrows+1, df2[!, n], 1, targetrows - nrows)
firstindex(newcol) != 1 && _onebased_check_error()
if atend
newcol[1:nrow1] .= missing
copyto!(newcol, nrow1+1, df2[!, n], 1, targetrows - nrow1)
else
newcol[nrow2+1:targetrows] .= missing
copyto!(newcol, 1, df2[!, n], 1, nrow2)
end
df1[!, n] = newcol
end
end
catch err
# Undo changes in case of error
for col in _columns(df1)
resize!(col, nrows)
@assert length(col) >= nrow1
if atend
resize!(col, nrow1)
elseif length(col) != nrow1
deleteat!(col, 1:length(col) - nrow1)
end
end
@error "Error adding value to column :$(_names(df1)[current_col])."
rethrow(err)
Expand Down
9 changes: 9 additions & 0 deletions src/other/tables.jl
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,15 @@ function Base.append!(df::DataFrame, table; cols::Symbol=:setequal,
append!(df, DataFrame(table, copycols=false), cols=cols, promote=promote)
end

function Base.prepend!(df::DataFrame, table; cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset]))
if table isa Dict && cols == :orderequal
throw(ArgumentError("passing `Dict` as `table` when `cols` is equal to " *
"`:orderequal` is not allowed as it is unordered"))
end
prepend!(df, DataFrame(table, copycols=false), cols=cols, promote=promote)
end

# This supports the Tables.RowTable type; needed to avoid ambiguities w/ another constructor
DataFrame(x::AbstractVector{NamedTuple{names, T}}; copycols::Bool=true) where {names, T} =
fromcolumns(Tables.columns(Tables.IteratorWrapper(x)), collect(names), copycols=false)
Expand Down
Loading

0 comments on commit a8af8d2

Please sign in to comment.