From 58454baed7c54f4502f76d891fe3d8d6aee87d2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Thu, 2 Sep 2021 14:54:14 +0200 Subject: [PATCH 1/4] deprecate delete!, define deleteat! and keepat! --- NEWS.md | 8 ++ docs/src/lib/functions.md | 3 +- docs/src/man/split_apply_combine.md | 2 +- src/DataFrames.jl | 8 ++ src/abstractdataframe/abstractdataframe.jl | 16 +-- src/abstractdataframe/subset.jl | 4 +- src/dataframe/dataframe.jl | 47 +++++++-- src/deprecated.jl | 6 +- src/groupeddataframe/groupeddataframe.jl | 2 +- src/other/precompile.jl | 4 +- src/subdataframe/subdataframe.jl | 5 +- test/data.jl | 6 +- test/dataframe.jl | 114 +++++++++++++++++---- test/subdataframe.jl | 5 +- 14 files changed, 180 insertions(+), 50 deletions(-) diff --git a/NEWS.md b/NEWS.md index 318e64dc99..86c8f37fa4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -47,6 +47,7 @@ filled with `missing` values. If `SubDataFrame` was not created with `:` as column selector the resulting operation must produce the same column names as stored in the source `SubDataFrame` or an error is thrown. + * `Tables.materializer` when passed the following types or their subtypes: `AbstractDataFrame`, `DataFrameRows`, `DataFrameColumns` returns `DataFrame`. ([#2839](https://github.com/JuliaData/DataFrames.jl/pull/2839)) @@ -54,12 +55,19 @@ (with `false` default) that specifies if columns should be inserted after or before `col`. ([#2829](https://github.com/JuliaData/DataFrames.jl/pull/2829)) +* Added support fro `deleteat!` and `keepat!` + ([#XXXX](https://github.com/JuliaData/DataFrames.jl/issues/XXXX)) ## Bug fixes * fix a problem with `unstack` on empty data frame ([#2842](https://github.com/JuliaData/DataFrames.jl/issues/2842)) +## Deprecations + +* `delete!` is deprecated in favor of `deleteat!` + ([#XXXX](https://github.com/JuliaData/DataFrames.jl/issues/XXXX)) + # DataFrames.jl v1.2.2 Patch Release Notes ## Bug fixes diff --git a/docs/src/lib/functions.md b/docs/src/lib/functions.md index fb415a5cd4..42bc539215 100644 --- a/docs/src/lib/functions.md +++ b/docs/src/lib/functions.md @@ -127,12 +127,13 @@ valuecols ## Filtering rows ```@docs -delete! +deleteat! empty empty! filter filter! first +keepat! last only nonunique diff --git a/docs/src/man/split_apply_combine.md b/docs/src/man/split_apply_combine.md index cd003397ed..6475883672 100644 --- a/docs/src/man/split_apply_combine.md +++ b/docs/src/man/split_apply_combine.md @@ -571,7 +571,7 @@ julia> push!(df, [3]) 3 │ 3 julia> gd[1] -ERROR: AssertionError: The current number of rows in the parent data frame is 3 and it does not match the number of rows it contained when GroupedDataFrame was created which was 2. The number of rows in the parent data frame has likely been changed unintentionally (e.g. using subset!, filter!, delete!, push!, or append! functions). +ERROR: AssertionError: The current number of rows in the parent data frame is 3 and it does not match the number of rows it contained when GroupedDataFrame was created which was 2. The number of rows in the parent data frame has likely been changed unintentionally (e.g. using subset!, filter!, deleteat!, keepat!, push!, or append! functions). ``` Sometimes it is useful to append rows to the source data frame of a diff --git a/src/DataFrames.jl b/src/DataFrames.jl index ca52ed392e..16d4215fd6 100644 --- a/src/DataFrames.jl +++ b/src/DataFrames.jl @@ -96,6 +96,14 @@ else export only end +if isdefined(Base, :keepat!) # Introduced in 1.7.0 + import Base.keepat! +else + # uncomment when https://github.com/JuliaLang/Compat.jl/issues/750 is resolved + # import Compat.keepat! + export keepat! +end + if VERSION >= v"1.3" using Base.Threads: @spawn else diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl index 597803d595..cb158309fc 100644 --- a/src/abstractdataframe/abstractdataframe.jl +++ b/src/abstractdataframe/abstractdataframe.jl @@ -937,7 +937,7 @@ function dropmissing!(df::AbstractDataFrame, disallowmissing::Bool=true) inds = completecases(df, cols) inds .= .!(inds) - delete!(df, inds) + deleteat!(df, inds) disallowmissing && disallowmissing!(df, cols) df end @@ -1124,7 +1124,7 @@ julia> filter!(AsTable(:) => nt -> nt.x == 1 || nt.y == "b", df) 3 │ 1 b ``` """ -Base.filter!(f, df::AbstractDataFrame) = delete!(df, findall(!f, eachrow(df))) +Base.filter!(f, df::AbstractDataFrame) = deleteat!(df, findall(!f, eachrow(df))) Base.filter!((col, f)::Pair{<:ColumnIndex}, df::AbstractDataFrame) = _filter!_helper(df, f, df[!, col]) Base.filter!((cols, f)::Pair{<:AbstractVector{Symbol}}, df::AbstractDataFrame) = @@ -1142,20 +1142,20 @@ function _filter!_helper(df::AbstractDataFrame, f, cols...) else rowidxs = findall(((x...) -> !(f(x...)::Bool)).(cols...)) end - return delete!(df, rowidxs) + return deleteat!(df, rowidxs) end function Base.filter!((cols, f)::Pair{<:AsTable}, df::AbstractDataFrame) dff = select(df, cols.cols, copycols=false) if ncol(dff) == 0 - return delete!(df, findall(x -> !f(NamedTuple()), axes(df, 1))) + return deleteat!(df, findall(x -> !f(NamedTuple()), axes(df, 1))) else return _filter!_helper_astable(df, Tables.namedtupleiterator(dff), f) end end _filter!_helper_astable(df::AbstractDataFrame, nti::Tables.NamedTupleIterator, f) = - delete!(df, _findall((x -> !(f(x)::Bool)).(nti))) + deleteat!(df, _findall((x -> !(f(x)::Bool)).(nti))) function Base.Matrix(df::AbstractDataFrame) T = reduce(promote_type, (eltype(v) for v in eachcol(df))) @@ -1267,11 +1267,11 @@ end nonunique(df::AbstractDataFrame, cols) = nonunique(select(df, cols, copycols=false)) -Base.unique!(df::AbstractDataFrame) = delete!(df, _findall(nonunique(df))) +Base.unique!(df::AbstractDataFrame) = deleteat!(df, _findall(nonunique(df))) Base.unique!(df::AbstractDataFrame, cols::AbstractVector) = - delete!(df, _findall(nonunique(df, cols))) + deleteat!(df, _findall(nonunique(df, cols))) Base.unique!(df::AbstractDataFrame, cols) = - delete!(df, _findall(nonunique(df, cols))) + deleteat!(df, _findall(nonunique(df, cols))) # Unique rows of an AbstractDataFrame. @inline function Base.unique(df::AbstractDataFrame; view::Bool=false) diff --git a/src/abstractdataframe/subset.jl b/src/abstractdataframe/subset.jl index 131fd8eeff..29e72b8fa0 100644 --- a/src/abstractdataframe/subset.jl +++ b/src/abstractdataframe/subset.jl @@ -289,7 +289,7 @@ julia> df """ function subset!(df::AbstractDataFrame, @nospecialize(args...); skipmissing::Bool=false) row_selector = _get_subset_conditions(df, Ref{Any}(args), skipmissing) - return delete!(df, findall(!, row_selector)) + return deleteat!(df, findall(!, row_selector)) end function subset!(gdf::GroupedDataFrame, @nospecialize(args...); skipmissing::Bool=false, @@ -299,7 +299,7 @@ function subset!(gdf::GroupedDataFrame, @nospecialize(args...); skipmissing::Boo lazy_lock = gdf.lazy_lock row_selector = _get_subset_conditions(gdf, Ref{Any}(args), skipmissing) df = parent(gdf) - res = delete!(df, findall(!, row_selector)) + res = deleteat!(df, findall(!, row_selector)) if nrow(res) == length(groups) # we have not removed any rows return ungroup ? res : gdf end diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl index 574f1059ad..1727d0d356 100755 --- a/src/dataframe/dataframe.jl +++ b/src/dataframe/dataframe.jl @@ -987,7 +987,7 @@ function Base.copy(df::DataFrame; copycols::Bool=true) end """ - delete!(df::DataFrame, inds) + deleteat!(df::DataFrame, inds) Delete rows specified by `inds` from a `DataFrame` `df` in place and return it. @@ -1005,7 +1005,7 @@ julia> df = DataFrame(a=1:3, b=4:6) 2 │ 2 5 3 │ 3 6 -julia> delete!(df, 2) +julia> deleteat!(df, 2) 2×2 DataFrame Row │ a b │ Int64 Int64 @@ -1013,9 +1013,8 @@ julia> delete!(df, 2) 1 │ 1 4 2 │ 3 6 ``` - """ -function Base.delete!(df::DataFrame, inds) +function Base.deleteat!(df::DataFrame, inds) if !isempty(inds) && size(df, 2) == 0 throw(BoundsError(df, (inds, :))) end @@ -1027,10 +1026,10 @@ function Base.delete!(df::DataFrame, inds) # we require ind to be stored and unique like in Base # otherwise an error will be thrown and the data frame will get corrupted - return _delete!_helper(df, inds) + return _deleteat!_helper(df, inds) end -function Base.delete!(df::DataFrame, inds::AbstractVector{Bool}) +function Base.deleteat!(df::DataFrame, inds::AbstractVector{Bool}) if length(inds) != size(df, 1) throw(BoundsError(df, (inds, :))) end @@ -1039,12 +1038,12 @@ function Base.delete!(df::DataFrame, inds::AbstractVector{Bool}) if VERSION <= v"1.6.2" && drop isa UnitRange{<:Integer} drop = collect(drop) end - return _delete!_helper(df, drop) + return _deleteat!_helper(df, drop) end -Base.delete!(df::DataFrame, inds::Not) = delete!(df, axes(df, 1)[inds]) +Base.deleteat!(df::DataFrame, inds::Not) = deleteat!(df, axes(df, 1)[inds]) -function _delete!_helper(df::DataFrame, drop) +function _deleteat!_helper(df::DataFrame, drop) cols = _columns(df) isempty(cols) && return df @@ -1068,6 +1067,36 @@ function _delete!_helper(df::DataFrame, drop) return df end +""" + keepat!(df::DataFrame, inds) + +Keep only rows specified by `inds` in a `DataFrame` `df` in place and return it. + +Internally [`deleteat!`](@ref) is called with `Not(inds)` selector. In consequence, +as opposed to `[`deleteat!`](@ref)`, if the passed `inds` is a collection of +integer indices does not have to be sorted and may contain duplicates. + +# Examples +```jldoctest +julia> df = DataFrame(a=1:3, b=4:6) +3×2 DataFrame + Row │ a b + │ Int64 Int64 +─────┼────────────── + 1 │ 1 4 + 2 │ 2 5 + 3 │ 3 6 + +julia> keepat!(df, 2) +1×2 DataFrame + Row │ a b + │ Int64 Int64 +─────┼────────────── + 1 │ 2 5 +``` +""" +keepat!(df::DataFrame, inds) = deleteat!(df, Not(inds)) + """ empty!(df::DataFrame) diff --git a/src/deprecated.jl b/src/deprecated.jl index 47d5585515..19839c54a6 100644 --- a/src/deprecated.jl +++ b/src/deprecated.jl @@ -1,7 +1,11 @@ export by, aggregate +# TODO: remove definitions in 2.0 release by(args...; kwargs...) = throw(ArgumentError("by function was removed from DataFrames.jl. " * "Use the `combine(groupby(...), ...)` or `combine(f, groupby(...))` instead.")) - aggregate(args...; kwargs...) = throw(ArgumentError("aggregate function was removed from DataFrames.jl. " * "Use the `combine` function instead.")) + +# TODO: remove deprecation in 2.0 release +import Base.delete! +@deprecate delete!(df::DataFrame, inds) deleteat!(df::DataFrame, inds) \ No newline at end of file diff --git a/src/groupeddataframe/groupeddataframe.jl b/src/groupeddataframe/groupeddataframe.jl index 6a3a74f51c..387e17240d 100644 --- a/src/groupeddataframe/groupeddataframe.jl +++ b/src/groupeddataframe/groupeddataframe.jl @@ -263,7 +263,7 @@ corrupt_msg(gd::GroupedDataFrame) = "rows it contained when GroupedDataFrame was created which was " * "$(length(getfield(gd, :groups))). The number of rows in the parent " * "data frame has likely been changed unintentionally " * - "(e.g. using subset!, filter!, delete!, push!, or append! functions)." + "(e.g. using subset!, filter!, deleteat!, keepat!, push!, or append! functions)." function Base.getproperty(gd::GroupedDataFrame, f::Symbol) @assert length(getfield(gd, :groups)) == nrow(getfield(gd, :parent)) corrupt_msg(gd) diff --git a/src/other/precompile.jl b/src/other/precompile.jl index 9d9d07c9eb..c37672b181 100644 --- a/src/other/precompile.jl +++ b/src/other/precompile.jl @@ -533,7 +533,7 @@ function precompile(all=false) Base.precompile(Tuple{typeof(do_call),typeof(maximum),Vector{Int},Vector{Int},Vector{Int},GroupedDataFrame{DataFrame},Tuple{Vector{Matrix{Float64}}},Int}) Base.precompile(Tuple{typeof(getindex),GroupedDataFrame{DataFrame},Vector{Tuple{Any, Int}}}) Base.precompile(Tuple{typeof(show),IOContext{IOBuffer},DataFrameRow{DataFrame, Index}}) - Base.precompile(Tuple{typeof(delete!),DataFrame,InvertedIndex{Vector{Bool}}}) + Base.precompile(Tuple{typeof(deleteat!),DataFrame,InvertedIndex{Vector{Bool}}}) Base.precompile(Tuple{Core.kwftype(typeof(leftjoin)),NamedTuple{(:on, :makeunique, :validate, :renamecols), Tuple{Vector{Any}, Bool, Pair{Bool, Bool}, Pair{Symbol, String}}},typeof(leftjoin),DataFrame,DataFrame}) Base.precompile(Tuple{typeof(groupreduce!),Vector{Union{Missing, ComplexF64}},Function,Function,Nothing,Nothing,Bool,Vector{Union{Missing, ComplexF64}},GroupedDataFrame{DataFrame}}) Base.precompile(Tuple{Core.kwftype(typeof(manipulate)),NamedTuple{(:copycols, :keeprows, :renamecols), Tuple{Bool, Bool, Bool}},typeof(manipulate),SubDataFrame{DataFrame, SubIndex{Index, Vector{Int}, Vector{Int}}, Vector{Int}},Base.OneTo{Int}}) @@ -1446,7 +1446,7 @@ function precompile(all=false) Base.precompile(Tuple{typeof(row_group_slots),Tuple{Vector{Union{Missing, Int}}, Vector{Int}},Tuple{IntegerRefpool{Union{Missing, Int}}, IntegerRefpool{Int}},Tuple{IntegerRefarray{Vector{Union{Missing, Int}}}, IntegerRefarray{Vector{Int}}},Val{false},Vector{Int},Bool,Bool}) Base.precompile(Tuple{Reduce{typeof(Base.add_sum), Nothing, Nothing},Vector{Union{Missing, Int, Int8}},GroupedDataFrame{DataFrame}}) Base.precompile(Tuple{Core.kwftype(typeof(innerjoin)),NamedTuple{(:on,), Tuple{Vector{Pair{Symbol, Symbol}}}},typeof(innerjoin),DataFrame,DataFrame}) - Base.precompile(Tuple{typeof(delete!),DataFrame,InvertedIndex{InvertedIndices.TupleVector{Tuple{Int, Int}}}}) + Base.precompile(Tuple{typeof(deleteat!),DataFrame,InvertedIndex{InvertedIndices.TupleVector{Tuple{Int, Int}}}}) Base.precompile(Tuple{typeof(groupreduce!),Vector{Union{Missing, Int}},Function,Function,Nothing,Nothing,Bool,Vector{Union{Missing, Bool}},GroupedDataFrame{DataFrame}}) Base.precompile(Tuple{typeof(_semijoin_sorted),Vector{String},Vector{Union{Missing, String}},BitVector}) Base.precompile(Tuple{Core.kwftype(typeof(hcat)),NamedTuple{(:makeunique,), Tuple{Bool}},typeof(hcat),SubDataFrame{DataFrame, Index, Vector{Int}},SubDataFrame{DataFrame, Index, Vector{Int}}}) diff --git a/src/subdataframe/subdataframe.jl b/src/subdataframe/subdataframe.jl index 02e2ba6541..d9ca7854bf 100644 --- a/src/subdataframe/subdataframe.jl +++ b/src/subdataframe/subdataframe.jl @@ -290,7 +290,10 @@ Base.setproperty!(::SubDataFrame, col_ind::AbstractString, v::Any) = Base.copy(sdf::SubDataFrame) = parent(sdf)[rows(sdf), parentcols(index(sdf), :)] -Base.delete!(df::SubDataFrame, ind) = +Base.deleteat!(df::SubDataFrame, ind) = + throw(ArgumentError("SubDataFrame does not support deleting rows")) + +keepat!(df::SubDataFrame, ind) = throw(ArgumentError("SubDataFrame does not support deleting rows")) function DataFrame(sdf::SubDataFrame; copycols::Bool=true) diff --git a/test/data.jl b/test/data.jl index 977efdd39b..5d00e874f5 100644 --- a/test/data.jl +++ b/test/data.jl @@ -192,14 +192,14 @@ end @test eltype(dropmissing!(df).b) == Int end -@testset "delete! https://github.com/JuliaLang/julia/pull/41646 bug workaround" begin +@testset "deleteat! https://github.com/JuliaLang/julia/pull/41646 bug workaround" begin # these tests will crash Julia if they are not correct df = DataFrame(a= Vector{Union{Bool,Missing}}(missing, 10^4)); - delete!(df, 2:(nrow(df) - 5)) + deleteat!(df, 2:(nrow(df) - 5)) @test nrow(df) == 6 df = DataFrame(a= Vector{Union{Bool,Missing}}(missing, 10^4)); - delete!(df, [false; trues(nrow(df) - 6); falses(5)]) + deleteat!(df, [false; trues(nrow(df) - 6); falses(5)]) @test nrow(df) == 6 end diff --git a/test/dataframe.jl b/test/dataframe.jl index a00e432650..3e1f1e1263 100644 --- a/test/dataframe.jl +++ b/test/dataframe.jl @@ -582,74 +582,150 @@ end @test_throws ArgumentError push!(df, "ab") end -@testset "delete!" begin +@testset "deleteat!" begin df = DataFrame(a=[1, 2], b=[3.0, 4.0]) - @test_throws BoundsError delete!(df, [true, true, true]) - @test delete!(df, 1) === df + @test_throws BoundsError deleteat!(df, [true, true, true]) + @test deleteat!(df, 1) === df @test df == DataFrame(a=[2], b=[4.0]) df = DataFrame(a=[1, 2], b=[3.0, 4.0]) - @test delete!(df, 2) === df + @test deleteat!(df, 2) === df @test df == DataFrame(a=[1], b=[3.0]) df = DataFrame(a=Union{Int, Missing}[1, 2], b=Union{Float64, Missing}[3.0, 4.0]) - @test delete!(df, 1) === df + @test deleteat!(df, 1) === df @test df == DataFrame(a=[2], b=[4.0]) df = DataFrame(a=Union{Int, Missing}[1, 2], b=Union{Float64, Missing}[3.0, 4.0]) - @test delete!(df, 2) === df + @test deleteat!(df, 2) === df @test df == DataFrame(a=[1], b=[3.0]) for v in (2:3, [2, 3]) df = DataFrame(a=Union{Int, Missing}[1, 2, 3], b=Union{Float64, Missing}[3.0, 4.0, 5.0]) - @test delete!(df, v) === df + @test deleteat!(df, v) === df @test df == DataFrame(a=[1], b=[3.0]) df = DataFrame(a=[1, 2, 3], b=[3.0, 4.0, 5.0]) - @test delete!(df, v) === df + @test deleteat!(df, v) === df @test df == DataFrame(a=[1], b=[3.0]) end df = DataFrame() - @test_throws BoundsError delete!(df, 10) - @test_throws BoundsError delete!(df, [10]) + @test_throws BoundsError deleteat!(df, 10) + @test_throws BoundsError deleteat!(df, [10]) df = DataFrame(a=[]) - @test_throws BoundsError delete!(df, 10) + @test_throws BoundsError deleteat!(df, 10) # the exception type changed between Julia 1.0.2 and Julia 1.1 # so we use their supertype below - @test_throws Exception delete!(df, [10]) + @test_throws Exception deleteat!(df, [10]) df = DataFrame(a=[1, 2, 3], b=[3, 2, 1]) - @test_throws ArgumentError delete!(df, [3, 2]) - @test_throws ArgumentError delete!(df, [2, 2]) - @test delete!(df, [false, true, false]) === df + @test_throws ArgumentError deleteat!(df, [3, 2]) + @test_throws ArgumentError deleteat!(df, [2, 2]) + @test deleteat!(df, [false, true, false]) === df @test df == DataFrame(a=[1, 3], b=[3, 1]) for v in (1, [1], 1:1, [true, false, false]) x = [1, 2, 3] df = DataFrame(x=x) - @test delete!(df, v) == DataFrame(x=[2, 3]) + @test deleteat!(df, v) == DataFrame(x=[2, 3]) @test x == [1, 2, 3] end for v in (1, [1], 1:1, [true, false, false], Not(2, 3), Not([false, true, true])) x = [1, 2, 3] df = DataFrame(x=x, copycols=false) - @test delete!(df, v) == DataFrame(x=[2, 3]) + @test deleteat!(df, v) == DataFrame(x=[2, 3]) @test x == [2, 3] end for inds in (1, [1], [true, false]) df = DataFrame(x1=[1, 2]) df.x2 = df.x1 - @test delete!(df, inds) === df + @test deleteat!(df, inds) === df @test df == DataFrame(x1=[2], x2=[2]) end df = DataFrame(a=1, b=2) push!(df.b, 3) - @test_throws AssertionError delete!(df, 1) + @test_throws AssertionError deleteat!(df, 1) +end + +@testset "keepat!" begin + df = DataFrame(a=[1, 2], b=[3.0, 4.0]) + @test_throws BoundsError keepat!(df, [true, true, true]) + @test keepat!(df, 1) === df + @test df == DataFrame(a=[1], b=[3.0]) + + df = DataFrame(a=[1, 2], b=[3.0, 4.0]) + @test keepat!(df, [2]) === df + @test df == DataFrame(a=[2], b=[4.0]) + + df = DataFrame(a=Union{Int, Missing}[1, 2], b=Union{Float64, Missing}[3.0, 4.0]) + @test keepat!(df, 2) === df + @test df == DataFrame(a=[2], b=[4.0]) + + df = DataFrame(a=Union{Int, Missing}[1, 2], b=Union{Float64, Missing}[3.0, 4.0]) + @test keepat!(df, [1]) === df + @test df == DataFrame(a=[1], b=[3.0]) + + for v in (2:3, [2, 3]) + df = DataFrame(a=Union{Int, Missing}[1, 2, 3], b=Union{Float64, Missing}[3.0, 4.0, 5.0]) + @test keepat!(df, v) === df + @test df == DataFrame(a=[2, 3], b=[4.0, 5.0]) + + df = DataFrame(a=[1, 2, 3], b=[3.0, 4.0, 5.0]) + @test keepat!(df, v) === df + @test df == DataFrame(a=[2, 3], b=[4.0, 5.0]) + end + + df = DataFrame() + @test_throws BoundsError keepat!(df, 10) + @test_throws BoundsError keepat!(df, [10]) + @test_throws BoundsError keepat!(df, 0) + @test_throws BoundsError keepat!(df, [0]) + + df = DataFrame(a=[]) + @test_throws BoundsError keepat!(df, 10) + @test_throws BoundsError keepat!(df, 0) + # the exception type changed between Julia 1.0.2 and Julia 1.1 + # so we use their supertype below + @test_throws Exception keepat!(df, [10]) + @test_throws Exception keepat!(df, [0]) + + df = DataFrame(a=[1, 2, 3], b=[3, 2, 1]) + @test keepat!(df, [3, 2]) == DataFrame(a=[2, 3], b=[2, 1]) + df = DataFrame(a=[1, 2, 3], b=[3, 2, 1]) + @test keepat!(df, [2, 2]) == DataFrame(a=2, b=2) + df = DataFrame(a=[1, 2, 3], b=[3, 2, 1]) + @test keepat!(df, [true, false, true]) === df + @test df == DataFrame(a=[1, 3], b=[3, 1]) + + for v in (1, [1], 1:1, [true, false, false]) + x = [1, 2, 3] + df = DataFrame(x=x) + @test keepat!(df, v) == DataFrame(x=[1]) + @test x == [1, 2, 3] + end + + for v in (1, [1], 1:1, [true, false, false], Not(2, 3), Not([false, true, true])) + x = [1, 2, 3] + df = DataFrame(x=x, copycols=false) + @test keepat!(df, v) == DataFrame(x=[1]) + @test x == [1] + end + + for inds in (1, [1], [true, false]) + df = DataFrame(x1=[1, 2]) + df.x2 = df.x1 + @test keepat!(df, inds) === df + @test df == DataFrame(x1=[1], x2=[1]) + end + + df = DataFrame(a=1, b=2) + push!(df.b, 3) + @test_throws AssertionError keepat!(df, 1) end @testset "describe" begin diff --git a/test/subdataframe.jl b/test/subdataframe.jl index edd1d1bf06..5ffa919e60 100644 --- a/test/subdataframe.jl +++ b/test/subdataframe.jl @@ -222,10 +222,11 @@ end @test names(df) == names(x)[[4, 2]] end -@testset "delete!" begin +@testset "deleteat! and keepat!" begin y = 1.0:10.0 df = view(DataFrame(y=y), 2:6, :) - @test_throws ArgumentError delete!(df, 1) + @test_throws ArgumentError deleteat!(df, 1) + @test_throws ArgumentError keepat!(df, 1) end @testset "parent" begin From 9b214df6a3be9a39a67138004c5ef27dc737f641 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sun, 14 Nov 2021 11:17:47 +0100 Subject: [PATCH 2/4] remove keepat! --- NEWS.md | 2 +- docs/src/lib/functions.md | 1 - docs/src/man/split_apply_combine.md | 2 +- src/DataFrames.jl | 8 --- src/dataframe/dataframe.jl | 33 +--------- src/groupeddataframe/groupeddataframe.jl | 2 +- src/subdataframe/subdataframe.jl | 3 - test/dataframe.jl | 76 ------------------------ test/subdataframe.jl | 3 +- 9 files changed, 6 insertions(+), 124 deletions(-) diff --git a/NEWS.md b/NEWS.md index 3696599111..5cc7884005 100644 --- a/NEWS.md +++ b/NEWS.md @@ -61,7 +61,7 @@ (with `false` default) that specifies if columns should be inserted after or before `col`. ([#2829](https://github.com/JuliaData/DataFrames.jl/pull/2829)) -* Added support fro `deleteat!` and `keepat!` +* Added support for `deleteat!` ([#2854](https://github.com/JuliaData/DataFrames.jl/issues/2854)) * `leftjoin!` performing a left join of two data frame objects by updating the left data frame with the joined columns from right data frame. diff --git a/docs/src/lib/functions.md b/docs/src/lib/functions.md index de70ca936e..67f31f5c9b 100644 --- a/docs/src/lib/functions.md +++ b/docs/src/lib/functions.md @@ -134,7 +134,6 @@ empty! filter filter! first -keepat! last only nonunique diff --git a/docs/src/man/split_apply_combine.md b/docs/src/man/split_apply_combine.md index 7976d352d1..bd8a639bd1 100644 --- a/docs/src/man/split_apply_combine.md +++ b/docs/src/man/split_apply_combine.md @@ -581,7 +581,7 @@ julia> push!(df, [3]) 3 │ 3 julia> gd[1] -ERROR: AssertionError: The current number of rows in the parent data frame is 3 and it does not match the number of rows it contained when GroupedDataFrame was created which was 2. The number of rows in the parent data frame has likely been changed unintentionally (e.g. using subset!, filter!, deleteat!, keepat!, push!, or append! functions). +ERROR: AssertionError: The current number of rows in the parent data frame is 3 and it does not match the number of rows it contained when GroupedDataFrame was created which was 2. The number of rows in the parent data frame has likely been changed unintentionally (e.g. using subset!, filter!, deleteat!, push!, or append! functions). ``` Sometimes it is useful to append rows to the source data frame of a diff --git a/src/DataFrames.jl b/src/DataFrames.jl index d6cd41a7b5..262759ac15 100644 --- a/src/DataFrames.jl +++ b/src/DataFrames.jl @@ -99,14 +99,6 @@ else export only end -if isdefined(Base, :keepat!) # Introduced in 1.7.0 - import Base.keepat! -else - # uncomment when https://github.com/JuliaLang/Compat.jl/issues/750 is resolved - # import Compat.keepat! - export keepat! -end - if VERSION >= v"1.3" using Base.Threads: @spawn else diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl index 00aa29649f..6e48be423e 100755 --- a/src/dataframe/dataframe.jl +++ b/src/dataframe/dataframe.jl @@ -1004,7 +1004,8 @@ end Delete rows specified by `inds` from a `DataFrame` `df` in place and return it. Internally `deleteat!` is called for all columns so `inds` must be: -a vector of sorted and unique integers, a boolean vector, an integer, or `Not`. +a vector of sorted and unique integers, a boolean vector, an integer. +Additionally `Not` any valid selector is accepted. # Examples ```jldoctest @@ -1079,36 +1080,6 @@ function _deleteat!_helper(df::DataFrame, drop) return df end -""" - keepat!(df::DataFrame, inds) - -Keep only rows specified by `inds` in a `DataFrame` `df` in place and return it. - -Internally [`deleteat!`](@ref) is called with `Not(inds)` selector. In consequence, -as opposed to `[`deleteat!`](@ref)`, if the passed `inds` is a collection of -integer indices does not have to be sorted and may contain duplicates. - -# Examples -```jldoctest -julia> df = DataFrame(a=1:3, b=4:6) -3×2 DataFrame - Row │ a b - │ Int64 Int64 -─────┼────────────── - 1 │ 1 4 - 2 │ 2 5 - 3 │ 3 6 - -julia> keepat!(df, 2) -1×2 DataFrame - Row │ a b - │ Int64 Int64 -─────┼────────────── - 1 │ 2 5 -``` -""" -keepat!(df::DataFrame, inds) = deleteat!(df, Not(inds)) - """ empty!(df::DataFrame) diff --git a/src/groupeddataframe/groupeddataframe.jl b/src/groupeddataframe/groupeddataframe.jl index b384121d0e..cac0a52b47 100644 --- a/src/groupeddataframe/groupeddataframe.jl +++ b/src/groupeddataframe/groupeddataframe.jl @@ -263,7 +263,7 @@ corrupt_msg(gd::GroupedDataFrame) = "rows it contained when GroupedDataFrame was created which was " * "$(length(getfield(gd, :groups))). The number of rows in the parent " * "data frame has likely been changed unintentionally " * - "(e.g. using subset!, filter!, deleteat!, keepat!, push!, or append! functions)." + "(e.g. using subset!, filter!, deleteat!, push!, or append! functions)." function Base.getproperty(gd::GroupedDataFrame, f::Symbol) @assert length(getfield(gd, :groups)) == nrow(getfield(gd, :parent)) corrupt_msg(gd) diff --git a/src/subdataframe/subdataframe.jl b/src/subdataframe/subdataframe.jl index d9ca7854bf..77e8ae484f 100644 --- a/src/subdataframe/subdataframe.jl +++ b/src/subdataframe/subdataframe.jl @@ -293,9 +293,6 @@ Base.copy(sdf::SubDataFrame) = parent(sdf)[rows(sdf), parentcols(index(sdf), :)] Base.deleteat!(df::SubDataFrame, ind) = throw(ArgumentError("SubDataFrame does not support deleting rows")) -keepat!(df::SubDataFrame, ind) = - throw(ArgumentError("SubDataFrame does not support deleting rows")) - function DataFrame(sdf::SubDataFrame; copycols::Bool=true) if copycols sdf[:, :] diff --git a/test/dataframe.jl b/test/dataframe.jl index 545941d909..566faa5218 100644 --- a/test/dataframe.jl +++ b/test/dataframe.jl @@ -652,82 +652,6 @@ end @test_throws AssertionError deleteat!(df, 1) end -@testset "keepat!" begin - df = DataFrame(a=[1, 2], b=[3.0, 4.0]) - @test_throws BoundsError keepat!(df, [true, true, true]) - @test keepat!(df, 1) === df - @test df == DataFrame(a=[1], b=[3.0]) - - df = DataFrame(a=[1, 2], b=[3.0, 4.0]) - @test keepat!(df, [2]) === df - @test df == DataFrame(a=[2], b=[4.0]) - - df = DataFrame(a=Union{Int, Missing}[1, 2], b=Union{Float64, Missing}[3.0, 4.0]) - @test keepat!(df, 2) === df - @test df == DataFrame(a=[2], b=[4.0]) - - df = DataFrame(a=Union{Int, Missing}[1, 2], b=Union{Float64, Missing}[3.0, 4.0]) - @test keepat!(df, [1]) === df - @test df == DataFrame(a=[1], b=[3.0]) - - for v in (2:3, [2, 3]) - df = DataFrame(a=Union{Int, Missing}[1, 2, 3], b=Union{Float64, Missing}[3.0, 4.0, 5.0]) - @test keepat!(df, v) === df - @test df == DataFrame(a=[2, 3], b=[4.0, 5.0]) - - df = DataFrame(a=[1, 2, 3], b=[3.0, 4.0, 5.0]) - @test keepat!(df, v) === df - @test df == DataFrame(a=[2, 3], b=[4.0, 5.0]) - end - - df = DataFrame() - @test_throws BoundsError keepat!(df, 10) - @test_throws BoundsError keepat!(df, [10]) - @test_throws BoundsError keepat!(df, 0) - @test_throws BoundsError keepat!(df, [0]) - - df = DataFrame(a=[]) - @test_throws BoundsError keepat!(df, 10) - @test_throws BoundsError keepat!(df, 0) - # the exception type changed between Julia 1.0.2 and Julia 1.1 - # so we use their supertype below - @test_throws Exception keepat!(df, [10]) - @test_throws Exception keepat!(df, [0]) - - df = DataFrame(a=[1, 2, 3], b=[3, 2, 1]) - @test keepat!(df, [3, 2]) == DataFrame(a=[2, 3], b=[2, 1]) - df = DataFrame(a=[1, 2, 3], b=[3, 2, 1]) - @test keepat!(df, [2, 2]) == DataFrame(a=2, b=2) - df = DataFrame(a=[1, 2, 3], b=[3, 2, 1]) - @test keepat!(df, [true, false, true]) === df - @test df == DataFrame(a=[1, 3], b=[3, 1]) - - for v in (1, [1], 1:1, [true, false, false]) - x = [1, 2, 3] - df = DataFrame(x=x) - @test keepat!(df, v) == DataFrame(x=[1]) - @test x == [1, 2, 3] - end - - for v in (1, [1], 1:1, [true, false, false], Not(2, 3), Not([false, true, true])) - x = [1, 2, 3] - df = DataFrame(x=x, copycols=false) - @test keepat!(df, v) == DataFrame(x=[1]) - @test x == [1] - end - - for inds in (1, [1], [true, false]) - df = DataFrame(x1=[1, 2]) - df.x2 = df.x1 - @test keepat!(df, inds) === df - @test df == DataFrame(x1=[1], x2=[1]) - end - - df = DataFrame(a=1, b=2) - push!(df.b, 3) - @test_throws AssertionError keepat!(df, 1) -end - @testset "describe" begin # Construct the test dataframe df = DataFrame(number = [1, 2, 3, 4], diff --git a/test/subdataframe.jl b/test/subdataframe.jl index 5ffa919e60..24c26c64c3 100644 --- a/test/subdataframe.jl +++ b/test/subdataframe.jl @@ -222,11 +222,10 @@ end @test names(df) == names(x)[[4, 2]] end -@testset "deleteat! and keepat!" begin +@testset "deleteat!" begin y = 1.0:10.0 df = view(DataFrame(y=y), 2:6, :) @test_throws ArgumentError deleteat!(df, 1) - @test_throws ArgumentError keepat!(df, 1) end @testset "parent" begin From e651b8826f1e69db645689ee061fb8b73d37899a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sun, 14 Nov 2021 18:29:07 +0100 Subject: [PATCH 3/4] Apply suggestions from code review Co-authored-by: Milan Bouchet-Valat --- src/dataframe/dataframe.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl index 6e48be423e..0e251cbf49 100755 --- a/src/dataframe/dataframe.jl +++ b/src/dataframe/dataframe.jl @@ -1004,8 +1004,8 @@ end Delete rows specified by `inds` from a `DataFrame` `df` in place and return it. Internally `deleteat!` is called for all columns so `inds` must be: -a vector of sorted and unique integers, a boolean vector, an integer. -Additionally `Not` any valid selector is accepted. +a vector of sorted and unique integers, a boolean vector, an integer, +or `Not` wrapping any valid selector. # Examples ```jldoctest From 8e2237749aec854e61c8201283c0dfb07c2929cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sun, 14 Nov 2021 18:37:33 +0100 Subject: [PATCH 4/4] fix tests --- test/dataframe.jl | 9 ++++++--- test/deprecated.jl | 10 ++++++++++ 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/test/dataframe.jl b/test/dataframe.jl index 566faa5218..fd15d83871 100644 --- a/test/dataframe.jl +++ b/test/dataframe.jl @@ -616,9 +616,12 @@ end df = DataFrame(a=[]) @test_throws BoundsError deleteat!(df, 10) - # the exception type changed between Julia 1.0.2 and Julia 1.1 - # so we use their supertype below - @test_throws Exception deleteat!(df, [10]) + + if VERSION >= v"1.1" + @test_throws BoundsError deleteat!(df, [10]) + else + @test_throws InexactError deleteat!(df, [10]) + end df = DataFrame(a=[1, 2, 3], b=[3, 2, 1]) @test_throws ArgumentError deleteat!(df, [3, 2]) diff --git a/test/deprecated.jl b/test/deprecated.jl index d7b41d35ee..b7338514c6 100644 --- a/test/deprecated.jl +++ b/test/deprecated.jl @@ -130,4 +130,14 @@ end @test df3.x1 === x end +@testset "delete!" begin + df = DataFrame(a=1:4, b=1, c=2) + @test delete!(copy(df), 1) == deleteat!(copy(df), 1) + @test delete!(copy(df), [1, 3]) == deleteat!(copy(df), [1, 3]) + @test delete!(copy(df), [true, false, false, true]) == deleteat!(copy(df), [true, false, false, true]) + @test delete!(copy(df), Not(1)) == deleteat!(copy(df), Not(1)) + delete!(df, 2) + @test df == DataFrame(a=[1, 3, 4], b=1, c=2) +end + end # module