From 0469c8bed7d58707bb17eb7a37662e3e273e6786 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 6 Jun 2022 12:38:31 +0200 Subject: [PATCH 1/5] add prepend! --- src/dataframe/dataframe.jl | 68 +++++++-- src/other/tables.jl | 9 ++ test/dataframe.jl | 287 ++++++++++++++++++++++++++++++++----- 3 files changed, 317 insertions(+), 47 deletions(-) diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl index d09dbfd497..c785a4bb73 100755 --- a/src/dataframe/dataframe.jl +++ b/src/dataframe/dataframe.jl @@ -1322,8 +1322,16 @@ julia> df1 6 │ 6 6 ``` """ -function Base.append!(df1::DataFrame, df2::AbstractDataFrame; cols::Symbol=:setequal, - promote::Bool=(cols in [:union, :subset])) +Base.append!(df1::DataFrame, df2::AbstractDataFrame; cols::Symbol=:setequal, + promote::Bool=(cols in [:union, :subset])) = + _append_or_prepend!(df1, df2, cols=cols, promote=promote, atend=true) + +Base.prepend!(df1::DataFrame, df2::AbstractDataFrame; cols::Symbol=:setequal, + promote::Bool=(cols in [:union, :subset])) = + _append_or_prepend!(df1, df2, cols=cols, promote=promote, atend=false) + +function _append_or_prepend!(df1::DataFrame, df2::AbstractDataFrame; cols::Symbol, + promote::Bool, atend::Bool) if !(cols in (:orderequal, :setequal, :intersect, :subset, :union)) throw(ArgumentError("`cols` keyword argument must be " * ":orderequal, :setequal, :intersect, :subset or :union)")) @@ -1371,8 +1379,9 @@ function Base.append!(df1::DataFrame, df2::AbstractDataFrame; cols::Symbol=:sete end end - nrows, ncols = size(df1) - targetrows = nrows + nrow(df2) + nrow1 = nrow(df1) + nrow2 = nrow(df2) + targetrows = nrow1 + nrow2 current_col = 0 # in the code below we use a direct access to _columns because # we resize the columns so temporarily the `DataFrame` is internally @@ -1387,24 +1396,42 @@ function Base.append!(df1::DataFrame, df2::AbstractDataFrame; cols::Symbol=:sete T = eltype(df1_c) if S <: T || !promote || promote_type(S, T) <: T # if S <: T || promote_type(S, T) <: T this should never throw an exception - append!(df1_c, df2_c) + if atend + append!(df1_c, df2_c) + else + prepend!(df1_c, df2_c) + end else newcol = similar(df1_c, promote_type(S, T), targetrows) - copyto!(newcol, 1, df1_c, 1, nrows) - copyto!(newcol, nrows+1, df2_c, 1, targetrows - nrows) firstindex(newcol) != 1 && _onebased_check_error() + if atend + copyto!(newcol, 1, df1_c, 1, nrow1) + copyto!(newcol, nrow1+1, df2_c, 1, nrow2) + else + copyto!(newcol, 1, df2_c, 1, nrow2) + copyto!(newcol, nrow2+1, df1_c, 1, nrow1) + end _columns(df1)[j] = newcol end else if Missing <: eltype(df1[!, j]) - resize!(df1[!, j], targetrows) - df1[nrows+1:targetrows, j] .= missing + if atend + resize!(df1[!, j], targetrows) + df1[nrow1+1:targetrows, j] .= missing + else + prepend!(df1[!, j], Iterators.repeated(missing, nrow2)) + end elseif promote newcol = similar(df1[!, j], Union{Missing, eltype(df1[!, j])}, targetrows) - copyto!(newcol, 1, df1[!, j], 1, nrows) - newcol[nrows+1:targetrows] .= missing firstindex(newcol) != 1 && _onebased_check_error() + if atend + copyto!(newcol, 1, df1[!, j], 1, nrow1) + newcol[nrow1+1:targetrows] .= missing + else + copyto!(newcol, nrow2+1, df1[!, j], 1, nrow1) + newcol[1:nrow2] .= missing + end _columns(df1)[j] = newcol else throw(ArgumentError("promote=false and source data frame does " * @@ -1422,15 +1449,28 @@ function Base.append!(df1::DataFrame, df2::AbstractDataFrame; cols::Symbol=:sete for n in setdiff(_names(df2), _names(df1)) newcol = similar(df2[!, n], Union{Missing, eltype(df2[!, n])}, targetrows) - @inbounds newcol[1:nrows] .= missing - copyto!(newcol, nrows+1, df2[!, n], 1, targetrows - nrows) + firstindex(newcol) != 1 && _onebased_check_error() + if atend + newcol[1:nrow1] .= missing + copyto!(newcol, nrow1+1, df2[!, n], 1, targetrows - nrow1) + else + newcol[nrow2+1:targetrows] .= missing + copyto!(newcol, 1, df2[!, n], 1, nrow2) + end df1[!, n] = newcol end end catch err # Undo changes in case of error for col in _columns(df1) - resize!(col, nrows) + @assert length(col) >= nrow1 + if atend + resize!(col, nrow1) + else + if length(col) != nrow1 + deleteat!(col, 1:length(col) - nrow1) + end + end end @error "Error adding value to column :$(_names(df1)[current_col])." rethrow(err) diff --git a/src/other/tables.jl b/src/other/tables.jl index 6fac44a242..09cf1f634d 100644 --- a/src/other/tables.jl +++ b/src/other/tables.jl @@ -69,6 +69,15 @@ function Base.append!(df::DataFrame, table; cols::Symbol=:setequal, append!(df, DataFrame(table, copycols=false), cols=cols, promote=promote) end +function Base.prepend!(df::DataFrame, table; cols::Symbol=:setequal, + promote::Bool=(cols in [:union, :subset])) + if table isa Dict && cols == :orderequal + throw(ArgumentError("passing `Dict` as `table` when `cols` is equal to " * + "`:orderequal` is not allowed as it is unordered")) + end + prepend!(df, DataFrame(table, copycols=false), cols=cols, promote=promote) +end + # This supports the Tables.RowTable type; needed to avoid ambiguities w/ another constructor DataFrame(x::AbstractVector{NamedTuple{names, T}}; copycols::Bool=true) where {names, T} = fromcolumns(Tables.columns(Tables.IteratorWrapper(x)), collect(names), copycols=false) diff --git a/test/dataframe.jl b/test/dataframe.jl index 0e76e4574e..6cb341a097 100644 --- a/test/dataframe.jl +++ b/test/dataframe.jl @@ -596,6 +596,44 @@ end @test_throws ArgumentError push!(df, "ab") end +@testset "extra push! tests" begin + for df in [DataFrame(a=Any[1]), DataFrame(a=1)] + @test push!(df, (b=1,), cols=:union) ≅ + DataFrame(a=[1, missing], b=[missing, 1]) + @test push!(df, (b=1,), cols=:union) ≅ + DataFrame(a=[1, missing, missing], b=[missing, 1, 1]) + df.x = 1:3 + with_logger(SimpleLogger(IOBuffer())) do + @test_throws MethodError push!(df, (b=1,), cols=:union, promote=false) + end + @test df ≅ DataFrame(a=[1, missing, missing], b=[missing, 1, 1], x=1:3) + allowmissing!(df, :x) + @test push!(df, (b=1,), cols=:union, promote=false) ≅ + DataFrame(a=[1, missing, missing, missing], b=[missing, 1, 1, 1], + x=[1:3; missing]) + end + + for df in [DataFrame(a=Any[1]), DataFrame(a=1)] + @test push!(df, DataFrame(b=1)[1, :], cols=:union) ≅ + DataFrame(a=[1, missing], b=[missing, 1]) + @test push!(df, DataFrame(b=1)[1, :], cols=:union) ≅ + DataFrame(a=[1, missing, missing], b=[missing, 1, 1]) + df.x = 1:3 + with_logger(SimpleLogger(IOBuffer())) do + @test_throws MethodError push!(df, DataFrame(b=1)[1, :], cols=:union, + promote=false) + end + @test df ≅ DataFrame(a=[1, missing, missing], b=[missing, 1, 1], x=1:3) + allowmissing!(df, :x) + @test push!(df, DataFrame(b=1)[1, :], cols=:union, promote=false) ≅ + DataFrame(a=[1, missing, missing, missing], b=[missing, 1, 1, 1], + x=[1:3; missing]) + end + + @test_throws ArgumentError push!(DataFrame(), (a=1, b=2), cols=:unions) + @test_throws ArgumentError push!(DataFrame(), Dict('a'=>1, 'b'=>2), cols=:union) +end + @testset "deleteat!" begin df = DataFrame(a=[1, 2], b=[3.0, 4.0]) @test_throws BoundsError deleteat!(df, [true, true, true]) @@ -836,6 +874,77 @@ end @test df == DataFrame(A=1:2, B=1:2) end +@testset "prepend!" begin + buf = IOBuffer() + sl = SimpleLogger(buf) + df = DataFrame(A=1:2, B=1:2) + df2 = DataFrame(A=[3, 4, 1, 2], B=[3, 4, 1, 2]) + @test prepend!(df, DataFrame(A=3:4, B=[3.0, 4.0])) == df2 + with_logger(sl) do + @test_throws InexactError prepend!(df, DataFrame(A=3:4, B=[3.5, 4.5])) + end + @test df == df2 + @test occursin("Error adding value to column :B", String(take!(buf))) + with_logger(sl) do + @test_throws MethodError prepend!(df, DataFrame(A=3:4, B=["a", "b"])) + end + @test df == df2 + @test occursin("Error adding value to column :B", String(take!(buf))) + @test_throws ArgumentError prepend!(df, DataFrame(A=1:4, C=1:4)) + @test df == df2 + + dfx = DataFrame() + df3 = prepend!(dfx, df) + @test dfx === df3 + @test df3 == df + @test df3[!, 1] !== df[!, 1] + @test df3[!, 2] !== df[!, 2] + + df4 = prepend!(df3, DataFrame()) + @test df4 === df3 + @test df4 == df + + df = DataFrame() + df.a = [1, 2, 3] + df.b = df.a + dfc = copy(df) + with_logger(sl) do + @test_throws AssertionError prepend!(df, dfc) + end + @test df == dfc + @test occursin("Error adding value to column :a", String(take!(buf))) + + df = DataFrame() + df.a = [1, 2, 3, 4] + df.b = df.a + df.c = [1, 2, 3, 4] + dfc = copy(df) + with_logger(sl) do + @test_throws AssertionError prepend!(df, dfc) + end + @test df == dfc + @test occursin("Error adding value to column :a", String(take!(buf))) + + rename!(df, [:a, :b, :z]) + @test_throws ArgumentError prepend!(df, dfc) + + df = DataFrame(A=1:2, B=1:2) + df2 = DataFrame(A=[3, 4, 1, 2], B=[3, 4, 1, 2]) + @test prepend!(copy(df), DataFrame(A=3:4, B=[3.0, 4.0])) == df2 + @test prepend!(copy(df), DataFrame(A=3:4, B=[3.0, 4.0]), cols=:setequal) == df2 + @test prepend!(copy(df), DataFrame(B=3:4, A=[3.0, 4.0])) == df2 + @test prepend!(copy(df), DataFrame(B=3:4, A=[3.0, 4.0]), cols=:setequal) == df2 + @test prepend!(copy(df), Dict(:A => 3:4, :B => [3.0, 4.0])) == df2 + @test prepend!(copy(df), Dict(:A => 3:4, :B => [3.0, 4.0]), cols=:setequal) == df2 + @test prepend!(copy(df), DataFrame(A=3:4, B=[3.0, 4.0]), cols=:orderequal) == df2 + @test prepend!(copy(df), OrderedDict(:A => 3:4, :B => [3.0, 4.0]), cols=:orderequal) == df2 + @test_throws ArgumentError prepend!(df, Dict(:A => 3:4, :B => [3.0, 4.0]), cols=:orderequal) + @test_throws ArgumentError prepend!(df, DataFrame(B=3:4, A=[3.0, 4.0]), cols=:orderequal) + @test_throws ArgumentError prepend!(df, OrderedDict(:B => 3:4, :A => [3.0, 4.0]), cols=:orderequal) + @test_throws ArgumentError prepend!(df, DataFrame(B=3:4, A=[3.0, 4.0]), cols=:xxx) + @test df == DataFrame(A=1:2, B=1:2) +end + @testset "append! default options" begin buf = IOBuffer() sl = SimpleLogger(buf) @@ -867,7 +976,42 @@ end df1 = DataFrame(x=1:3, y=1:3) df2 = DataFrame(y=4:6) append!(df1, df2, cols=cols) - @test df1 ≅ DataFrame(x=[1:3;missing; missing; missing], y=1:6) + @test df1 ≅ DataFrame(x=[1:3; missing; missing; missing], y=1:6) + end +end + +@testset "prepend! default options" begin + buf = IOBuffer() + sl = SimpleLogger(buf) + + df1 = DataFrame(x=1:3, y=1:3) + df2 = DataFrame(y=4:6, x=1:3) + prepend!(df1, df2) + @test df1 == DataFrame(x=[1:3;1:3], y=[4:6; 1:3]) + + df2 = DataFrame(y=4:6, x=1:3, z=1) + @test_throws ArgumentError prepend!(df1, df2) + @test df1 == DataFrame(x=[1:3;1:3], y=[4:6; 1:3]) + + df2 = DataFrame(y=4:6, x=[missing, missing, missing]) + with_logger(sl) do + @test_throws MethodError prepend!(df1, df2) + end + @test df1 == DataFrame(x=[1:3;1:3], y=[4:6; 1:3]) + + df2 = DataFrame(x=[missing, missing, missing], y=4:6) + for cols in (:orderequal, :intersect) + with_logger(sl) do + @test_throws MethodError prepend!(df1, df2, cols=cols) + end + @test df1 == DataFrame(x=[1:3;1:3], y=[4:6; 1:3]) + end + + for cols in (:subset, :union) + df1 = DataFrame(x=1:3, y=1:3) + df2 = DataFrame(y=4:6) + prepend!(df1, df2, cols=cols) + @test df1 ≅ DataFrame(x=[missing; missing; missing; 1:3], y=[4:6; 1:3]) end end @@ -966,61 +1110,138 @@ end end end end -end -@testset "new append! and push! tests" begin for df in [DataFrame(a=Any[1]), DataFrame(a=1)] @test append!(df, DataFrame(b=1), cols=:union) ≅ DataFrame(a=[1, missing], b=[missing, 1]) - @test append!(df, DataFrame(b=1), cols=:union) ≅ - DataFrame(a=[1, missing, missing], b=[missing, 1, 1]) + @test append!(df, DataFrame(b=2), cols=:union) ≅ + DataFrame(a=[1, missing, missing], b=[missing, 1, 2]) df.x = 1:3 with_logger(SimpleLogger(IOBuffer())) do @test_throws ArgumentError append!(df, DataFrame(b=1), cols=:union, promote=false) end - @test df ≅ DataFrame(a=[1, missing, missing], b=[missing, 1, 1], x=1:3) + @test df ≅ DataFrame(a=[1, missing, missing], b=[missing, 1, 2], x=1:3) allowmissing!(df, :x) - @test append!(df, DataFrame(b=1), cols=:union, promote=false) ≅ - DataFrame(a=[1, missing, missing, missing], b=[missing, 1, 1, 1], + @test append!(df, DataFrame(b=3), cols=:union, promote=false) ≅ + DataFrame(a=[1, missing, missing, missing], b=[missing, 1, 2, 3], x=[1:3; missing]) end +end - for df in [DataFrame(a=Any[1]), DataFrame(a=1)] - @test push!(df, (b=1,), cols=:union) ≅ - DataFrame(a=[1, missing], b=[missing, 1]) - @test push!(df, (b=1,), cols=:union) ≅ - DataFrame(a=[1, missing, missing], b=[missing, 1, 1]) - df.x = 1:3 - with_logger(SimpleLogger(IOBuffer())) do - @test_throws MethodError push!(df, (b=1,), cols=:union, promote=false) +@testset "prepend! advanced options" begin + buf = IOBuffer() + sl = SimpleLogger(buf) + + for cols in (:orderequal, :setequal, :intersect, :subset, :union) + for promote in (true, false) + df1 = DataFrame(x=1:3, y=1:3) + df2 = DataFrame(x=1:3, y=4:6) + prepend!(df1, df2, cols=cols, promote=promote) + @test df1 == DataFrame(x=[1:3;1:3], y=[4:6; 1:3]) + @test eltype(df1.x) == Int + @test eltype(df1.y) == Int + + df1 = DataFrame(x=1:3, y=1:3) + df2 = DataFrame(y=4:6, x=1:3) + if cols == :orderequal + @test_throws ArgumentError prepend!(df1, df2, cols=cols, promote=promote) + @test df1 == DataFrame(x=1:3, y=1:3) + else + prepend!(df1, df2, cols=cols, promote=promote) + @test df1 == DataFrame(x=[1:3;1:3], y=[4:6; 1:3]) + @test eltype(df1.x) == Int + @test eltype(df1.y) == Int + end + + df1 = DataFrame() + df1.x = 1:3 + df1.y = df1.x + df2 = DataFrame(x=1:3, y=4:6) + with_logger(sl) do + @test_throws AssertionError prepend!(df1, df2, cols=cols, promote=promote) + end + @test df1 == DataFrame(x=1:3, y=1:3) + df2 = DataFrame(y=4:6, x=1:3) + with_logger(sl) do + @test_throws (cols == :orderequal ? ArgumentError : + AssertionError) prepend!(df1, df2, cols=cols, promote=promote) + end + @test df1 == DataFrame(x=1:3, y=1:3) + + df1 = DataFrame(x=1:3, y=1:3) + df2 = DataFrame(x=1:3, y=4:6, z=11:13) + if cols in [:orderequal, :setequal] + @test_throws ArgumentError prepend!(df1, df2, cols=cols, promote=promote) + @test df1 == DataFrame(x=1:3, y=1:3) + elseif cols == :union + prepend!(df1, df2, cols=cols, promote=promote) + @test df1 ≅ DataFrame(x=[1:3;1:3], y=[4:6; 1:3], + z=[11:13; missing; missing; missing]) + @test eltype(df1.x) == Int + @test eltype(df1.y) == Int + @test eltype(df1.z) == Union{Missing, Int} + else + prepend!(df1, df2, cols=cols, promote=promote) + @test df1 == DataFrame(x=[1:3;1:3], y=[4:6; 1:3]) + @test eltype(df1.x) == Int + @test eltype(df1.y) == Int + end + + df1 = DataFrame(x=1:3, y=1:3) + df2 = DataFrame(x=1:3, y=[missing, missing, missing]) + if promote + prepend!(df1, df2, cols=cols, promote=true) + @test df1 ≅ DataFrame(x=[1:3;1:3], y=[missing; missing; missing; 1:3]) + @test eltype(df1.x) == Int + @test eltype(df1.y) == Union{Missing, Int} + else + with_logger(sl) do + @test_throws MethodError prepend!(df1, df2, cols=cols, promote=promote) + end + @test df1 == DataFrame(x=1:3, y=1:3) + end + + df1 = DataFrame(x=1:3, y=1:3) + df2 = DataFrame(x=1:3, z=11:13) + if !promote || cols in [:orderequal, :setequal, :intersect] + with_logger(sl) do + @test_throws ArgumentError prepend!(df1, df2, cols=cols, promote=promote) + end + @test df1 == DataFrame(x=1:3, y=1:3) + elseif cols == :union + prepend!(df1, df2, cols=cols, promote=true) + @test df1 ≅ DataFrame(x=[1:3;1:3], y=[missing; missing; missing; 1:3], + z=[11:13; missing; missing; missing]) + @test eltype(df1.x) == Int + @test eltype(df1.y) == Union{Missing, Int} + @test eltype(df1.z) == Union{Missing, Int} + else + prepend!(df1, df2, cols=cols, promote=true) + @test df1 ≅ DataFrame(x=[1:3;1:3], y=[missing; missing; missing; 1:3]) + @test eltype(df1.x) == Int + @test eltype(df1.y) == Union{Missing, Int} + end end - @test df ≅ DataFrame(a=[1, missing, missing], b=[missing, 1, 1], x=1:3) - allowmissing!(df, :x) - @test push!(df, (b=1,), cols=:union, promote=false) ≅ - DataFrame(a=[1, missing, missing, missing], b=[missing, 1, 1, 1], - x=[1:3; missing]) end for df in [DataFrame(a=Any[1]), DataFrame(a=1)] - @test push!(df, DataFrame(b=1)[1, :], cols=:union) ≅ - DataFrame(a=[1, missing], b=[missing, 1]) - @test push!(df, DataFrame(b=1)[1, :], cols=:union) ≅ - DataFrame(a=[1, missing, missing], b=[missing, 1, 1]) + @test prepend!(df, DataFrame(b=1), cols=:union) ≅ + DataFrame(a=[missing, 1], b=[1, missing]) + @test prepend!(df, DataFrame(b=2), cols=:union) ≅ + DataFrame(a=[missing, missing, 1], b=[2, 1, missing]) df.x = 1:3 with_logger(SimpleLogger(IOBuffer())) do - @test_throws MethodError push!(df, DataFrame(b=1)[1, :], cols=:union, - promote=false) + @test_throws ArgumentError prepend!(df, DataFrame(b=1), cols=:union, + promote=false) end - @test df ≅ DataFrame(a=[1, missing, missing], b=[missing, 1, 1], x=1:3) + @test df ≅ DataFrame(a=[missing, missing, 1], b=[2, 1, missing], x=1:3) allowmissing!(df, :x) - @test push!(df, DataFrame(b=1)[1, :], cols=:union, promote=false) ≅ - DataFrame(a=[1, missing, missing, missing], b=[missing, 1, 1, 1], - x=[1:3; missing]) + @test prepend!(df, DataFrame(b=3), cols=:union, promote=false) ≅ + DataFrame(a=[missing, missing, missing, 1], b=[3, 2, 1, missing], + x=[missing; 1:3]) end - @test_throws ArgumentError push!(DataFrame(), (a=1, b=2), cols=:unions) - @test_throws ArgumentError push!(DataFrame(), Dict('a'=>1, 'b'=>2), cols=:union) end @testset "rename" begin From 5186267cb8fe352cb4379c693b5e67de13cec94c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Tue, 7 Jun 2022 09:14:14 +0200 Subject: [PATCH 2/5] add docstring --- src/dataframe/dataframe.jl | 84 +++++++++++++++++++++++++++++++++++++- 1 file changed, 82 insertions(+), 2 deletions(-) diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl index c785a4bb73..72f0910c3a 100755 --- a/src/dataframe/dataframe.jl +++ b/src/dataframe/dataframe.jl @@ -1284,8 +1284,9 @@ columns that are aliases (equal when compared with `===`). # See also -Use [`push!`](@ref) to add individual rows to a data frame and [`vcat`](@ref) -to vertically concatenate data frames. +Use [`prepend!`](@ref) to add a table at the beginning, [`push!`](@ref) to add +individual rows to a data frame and [`vcat`](@ref) to vertically concatenate +data frames. # Examples ```jldoctest @@ -1326,6 +1327,85 @@ Base.append!(df1::DataFrame, df2::AbstractDataFrame; cols::Symbol=:setequal, promote::Bool=(cols in [:union, :subset])) = _append_or_prepend!(df1, df2, cols=cols, promote=promote, atend=true) +""" + prepend!(df::DataFrame, df2::AbstractDataFrame; cols::Symbol=:setequal, + promote::Bool=(cols in [:union, :subset])) + prepend!(df::DataFrame, table; cols::Symbol=:setequal, + promote::Bool=(cols in [:union, :subset])) + +Add the rows of `df2` to the beginning of `df`. If the second argument `table` +is not an `AbstractDataFrame` then it is converted using +`DataFrame(table, copycols=false)` before being prepended. + +The exact behavior of `prepend!` depends on the `cols` argument: +* If `cols == :setequal` (this is the default) + then `df2` must contain exactly the same columns as `df` (but possibly in a + different order). +* If `cols == :orderequal` then `df2` must contain the same columns in the same + order (for `AbstractDict` this option requires that `keys(row)` matches + `propertynames(df)` to allow for support of ordered dicts; however, if `df2` + is a `Dict` an error is thrown as it is an unordered collection). +* If `cols == :intersect` then `df2` may contain more columns than `df`, but all + column names that are present in `df` must be present in `df2` and only these + are used. +* If `cols == :subset` then `append!` behaves like for `:intersect` but if some + column is missing in `df2` then a `missing` value is pushed to `df`. +* If `cols == :union` then `append!` adds columns missing in `df` that are present + in `df2`, for columns present in `df` but missing in `df2` a `missing` value + is pushed. + +If `promote=true` and element type of a column present in `df` does not allow +the type of a pushed argument then a new column with a promoted element type +allowing it is freshly allocated and stored in `df`. If `promote=false` an error +is thrown. + +The above rule has the following exceptions: +* If `df` has no columns then copies of columns from `df2` are added to it. +* If `df2` has no columns then calling `prepend!` leaves `df` unchanged. + +Please note that `prepend!` must not be used on a `DataFrame` that contains +columns that are aliases (equal when compared with `===`). + +# See also + +Use [`append!`](@ref) to add a table at an end and [`vcat`](@ref) +to vertically concatenate data frames. + +# Examples +```jldoctest +julia> df1 = DataFrame(A=1:3, B=1:3) +3×2 DataFrame + Row │ A B + │ Int64 Int64 +─────┼────────────── + 1 │ 1 1 + 2 │ 2 2 + 3 │ 3 3 + +julia> df2 = DataFrame(A=4.0:6.0, B=4:6) +3×2 DataFrame + Row │ A B + │ Float64 Int64 +─────┼──────────────── + 1 │ 4.0 4 + 2 │ 5.0 5 + 3 │ 6.0 6 + +julia> prepend!(df1, df2); + +julia> df1 +6×2 DataFrame + Row │ A B + │ Int64 Int64 +─────┼────────────── + 1 │ 4 4 + 2 │ 5 5 + 3 │ 6 6 + 4 │ 1 1 + 5 │ 2 2 + 6 │ 3 3 +``` +""" Base.prepend!(df1::DataFrame, df2::AbstractDataFrame; cols::Symbol=:setequal, promote::Bool=(cols in [:union, :subset])) = _append_or_prepend!(df1, df2, cols=cols, promote=promote, atend=false) From 37768f3560b4dd39c523cc5c8bf5d6256945c8aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Tue, 7 Jun 2022 09:14:38 +0200 Subject: [PATCH 3/5] add manual reference --- docs/src/lib/functions.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/src/lib/functions.md b/docs/src/lib/functions.md index a8af39a8f3..9ddb16c8e8 100644 --- a/docs/src/lib/functions.md +++ b/docs/src/lib/functions.md @@ -81,6 +81,7 @@ invpermute! mapcols mapcols! permute! +prepend! push! reduce repeat From 106d6673291b6f5321017af505104e42467e4f9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Wed, 8 Jun 2022 22:40:37 +0200 Subject: [PATCH 4/5] Apply suggestions from code review Co-authored-by: Milan Bouchet-Valat --- src/dataframe/dataframe.jl | 13 ++++++------- test/dataframe.jl | 4 ++-- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl index 72f0910c3a..bd5d468a39 100755 --- a/src/dataframe/dataframe.jl +++ b/src/dataframe/dataframe.jl @@ -1284,8 +1284,8 @@ columns that are aliases (equal when compared with `===`). # See also -Use [`prepend!`](@ref) to add a table at the beginning, [`push!`](@ref) to add -individual rows to a data frame and [`vcat`](@ref) to vertically concatenate +Use [`push!`](@ref) to add individual rows to a data frame, [`prepend!`](@ref) +to add a table at the beginning, and [`vcat`](@ref) to vertically concatenate data frames. # Examples @@ -1368,7 +1368,8 @@ columns that are aliases (equal when compared with `===`). # See also -Use [`append!`](@ref) to add a table at an end and [`vcat`](@ref) +Use [`pushfirst!`](@ref) to add individual rows at the beginning of a data frame, +[`append!`](@ref) to add a table at the end, and [`vcat`](@ref) to vertically concatenate data frames. # Examples @@ -1546,10 +1547,8 @@ function _append_or_prepend!(df1::DataFrame, df2::AbstractDataFrame; cols::Symbo @assert length(col) >= nrow1 if atend resize!(col, nrow1) - else - if length(col) != nrow1 - deleteat!(col, 1:length(col) - nrow1) - end + elseif length(col) != nrow1 + deleteat!(col, 1:length(col) - nrow1) end end @error "Error adding value to column :$(_names(df1)[current_col])." diff --git a/test/dataframe.jl b/test/dataframe.jl index 6cb341a097..c4730e4e60 100644 --- a/test/dataframe.jl +++ b/test/dataframe.jl @@ -606,7 +606,7 @@ end with_logger(SimpleLogger(IOBuffer())) do @test_throws MethodError push!(df, (b=1,), cols=:union, promote=false) end - @test df ≅ DataFrame(a=[1, missing, missing], b=[missing, 1, 1], x=1:3) + @test df ≅ DataFrame(a=[1, missing, missing], b=[missing, 1, 1], x=1:3) allowmissing!(df, :x) @test push!(df, (b=1,), cols=:union, promote=false) ≅ DataFrame(a=[1, missing, missing, missing], b=[missing, 1, 1, 1], @@ -623,7 +623,7 @@ end @test_throws MethodError push!(df, DataFrame(b=1)[1, :], cols=:union, promote=false) end - @test df ≅ DataFrame(a=[1, missing, missing], b=[missing, 1, 1], x=1:3) + @test df ≅ DataFrame(a=[1, missing, missing], b=[missing, 1, 1], x=1:3) allowmissing!(df, :x) @test push!(df, DataFrame(b=1)[1, :], cols=:union, promote=false) ≅ DataFrame(a=[1, missing, missing, missing], b=[missing, 1, 1, 1], From 63ba1654d9964468c8318bd9b971056a9e224125 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Wed, 8 Jun 2022 23:24:19 +0200 Subject: [PATCH 5/5] fix docs --- src/dataframe/dataframe.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl index bd5d468a39..3f3ef3dc26 100755 --- a/src/dataframe/dataframe.jl +++ b/src/dataframe/dataframe.jl @@ -1327,6 +1327,8 @@ Base.append!(df1::DataFrame, df2::AbstractDataFrame; cols::Symbol=:setequal, promote::Bool=(cols in [:union, :subset])) = _append_or_prepend!(df1, df2, cols=cols, promote=promote, atend=true) +# TODO: add a reference to pushfirst when it is added: +# [`pushfirst!`](@ref) to add individual rows at the beginning of a data frame, """ prepend!(df::DataFrame, df2::AbstractDataFrame; cols::Symbol=:setequal, promote::Bool=(cols in [:union, :subset])) @@ -1368,7 +1370,7 @@ columns that are aliases (equal when compared with `===`). # See also -Use [`pushfirst!`](@ref) to add individual rows at the beginning of a data frame, +Use [`append!`](@ref) to add a table at the end, and [`vcat`](@ref) to vertically concatenate data frames.