Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

deprecate delete!, define deleteat! #2854

Merged
merged 6 commits into from
Nov 14, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,16 @@
filled with `missing` values.
If `SubDataFrame` was not created with `:` as column selector the resulting operation
must produce the same column names as stored in the source `SubDataFrame` or an error is thrown.

* `Tables.materializer` when passed the following types or their subtypes:
`AbstractDataFrame`, `DataFrameRows`, `DataFrameColumns` returns `DataFrame`.
([#2839](https://github.com/JuliaData/DataFrames.jl/pull/2839))
* the `insertcols!` function receives new keyword argument `after`
(with `false` default) that specifies if columns should be inserted after
or before `col`.
([#2829](https://github.com/JuliaData/DataFrames.jl/pull/2829))
* Added support for `deleteat!`
([#2854](https://github.com/JuliaData/DataFrames.jl/issues/2854))
* `leftjoin!` performing a left join of two data frame objects by updating the
left data frame with the joined columns from right data frame.
([#2843](https://github.com/JuliaData/DataFrames.jl/pull/2843))
Expand Down Expand Up @@ -105,6 +108,11 @@
([#2869](https://github.com/JuliaData/DataFrames.jl/pull/2869)),
([#2919](https://github.com/JuliaData/DataFrames.jl/pull/2919))

## Deprecations

* `delete!` is deprecated in favor of `deleteat!`
([#2854](https://github.com/JuliaData/DataFrames.jl/issues/2854))

## Planned changes

* In DataFrames.jl 1.4 release on Julia 1.7 or newer broadcasting assignment
Expand Down
2 changes: 1 addition & 1 deletion docs/src/lib/functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ valuecols

## Filtering rows
```@docs
delete!
deleteat!
empty
empty!
filter
Expand Down
2 changes: 1 addition & 1 deletion docs/src/man/split_apply_combine.md
Original file line number Diff line number Diff line change
Expand Up @@ -581,7 +581,7 @@ julia> push!(df, [3])
3 │ 3

julia> gd[1]
ERROR: AssertionError: The current number of rows in the parent data frame is 3 and it does not match the number of rows it contained when GroupedDataFrame was created which was 2. The number of rows in the parent data frame has likely been changed unintentionally (e.g. using subset!, filter!, delete!, push!, or append! functions).
ERROR: AssertionError: The current number of rows in the parent data frame is 3 and it does not match the number of rows it contained when GroupedDataFrame was created which was 2. The number of rows in the parent data frame has likely been changed unintentionally (e.g. using subset!, filter!, deleteat!, push!, or append! functions).
```

Sometimes it is useful to append rows to the source data frame of a
Expand Down
16 changes: 8 additions & 8 deletions src/abstractdataframe/abstractdataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -979,7 +979,7 @@ function dropmissing!(df::AbstractDataFrame,
disallowmissing::Bool=true)
inds = completecases(df, cols)
inds .= .!(inds)
delete!(df, inds)
deleteat!(df, inds)
disallowmissing && disallowmissing!(df, cols)
df
end
Expand Down Expand Up @@ -1182,7 +1182,7 @@ julia> filter!(AsTable(:) => nt -> nt.x == 1 || nt.y == "b", df)
3 │ 1 b
```
"""
Base.filter!(f::Function, df::AbstractDataFrame) = delete!(df, findall(!f, eachrow(df)))
Base.filter!(f::Function, df::AbstractDataFrame) = deleteat!(df, findall(!f, eachrow(df)))
Base.filter!((col, f)::Pair{<:ColumnIndex}, df::AbstractDataFrame) =
_filter!_helper(df, f, df[!, col])
Base.filter!((cols, f)::Pair{<:AbstractVector{Symbol}}, df::AbstractDataFrame) =
Expand All @@ -1200,20 +1200,20 @@ function _filter!_helper(df::AbstractDataFrame, f, cols...)
else
rowidxs = findall(((x...) -> !(f(x...)::Bool)).(cols...))
end
return delete!(df, rowidxs)
return deleteat!(df, rowidxs)
end

function Base.filter!((cols, f)::Pair{<:AsTable}, df::AbstractDataFrame)
dff = select(df, cols.cols, copycols=false)
if ncol(dff) == 0
return delete!(df, findall(x -> !f(NamedTuple()), axes(df, 1)))
return deleteat!(df, findall(x -> !f(NamedTuple()), axes(df, 1)))
else
return _filter!_helper_astable(df, Tables.namedtupleiterator(dff), f)
end
end

_filter!_helper_astable(df::AbstractDataFrame, nti::Tables.NamedTupleIterator, f) =
delete!(df, _findall((x -> !(f(x)::Bool)).(nti)))
deleteat!(df, _findall((x -> !(f(x)::Bool)).(nti)))

function Base.Matrix(df::AbstractDataFrame)
T = reduce(promote_type, (eltype(v) for v in eachcol(df)), init=Union{})
Expand Down Expand Up @@ -1325,11 +1325,11 @@ end

nonunique(df::AbstractDataFrame, cols) = nonunique(select(df, cols, copycols=false))

Base.unique!(df::AbstractDataFrame) = delete!(df, _findall(nonunique(df)))
Base.unique!(df::AbstractDataFrame) = deleteat!(df, _findall(nonunique(df)))
Base.unique!(df::AbstractDataFrame, cols::AbstractVector) =
delete!(df, _findall(nonunique(df, cols)))
deleteat!(df, _findall(nonunique(df, cols)))
Base.unique!(df::AbstractDataFrame, cols) =
delete!(df, _findall(nonunique(df, cols)))
deleteat!(df, _findall(nonunique(df, cols)))

# Unique rows of an AbstractDataFrame.
@inline function Base.unique(df::AbstractDataFrame; view::Bool=false)
Expand Down
4 changes: 2 additions & 2 deletions src/abstractdataframe/subset.jl
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ julia> df
"""
function subset!(df::AbstractDataFrame, @nospecialize(args...); skipmissing::Bool=false)
row_selector = _get_subset_conditions(df, Ref{Any}(args), skipmissing)
return delete!(df, findall(!, row_selector))
return deleteat!(df, findall(!, row_selector))
end

function subset!(gdf::GroupedDataFrame, @nospecialize(args...); skipmissing::Bool=false,
Expand All @@ -329,7 +329,7 @@ function subset!(gdf::GroupedDataFrame, @nospecialize(args...); skipmissing::Boo
lazy_lock = gdf.lazy_lock
row_selector = _get_subset_conditions(gdf, Ref{Any}(args), skipmissing)
df = parent(gdf)
res = delete!(df, findall(!, row_selector))
res = deleteat!(df, findall(!, row_selector))
if nrow(res) == length(groups) # we have not removed any rows
return ungroup ? res : gdf
end
Expand Down
20 changes: 10 additions & 10 deletions src/dataframe/dataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -999,12 +999,13 @@ function Base.copy(df::DataFrame; copycols::Bool=true)
end

"""
delete!(df::DataFrame, inds)
deleteat!(df::DataFrame, inds)

Delete rows specified by `inds` from a `DataFrame` `df` in place and return it.

Internally `deleteat!` is called for all columns so `inds` must be:
a vector of sorted and unique integers, a boolean vector, an integer, or `Not`.
a vector of sorted and unique integers, a boolean vector, an integer,
or `Not` wrapping any valid selector.

# Examples
```jldoctest
Expand All @@ -1017,17 +1018,16 @@ julia> df = DataFrame(a=1:3, b=4:6)
2 │ 2 5
3 │ 3 6

julia> delete!(df, 2)
julia> deleteat!(df, 2)
2×2 DataFrame
Row │ a b
│ Int64 Int64
─────┼──────────────
1 │ 1 4
2 │ 3 6
```

"""
function Base.delete!(df::DataFrame, inds)
function Base.deleteat!(df::DataFrame, inds)
if !isempty(inds) && size(df, 2) == 0
throw(BoundsError(df, (inds, :)))
end
Expand All @@ -1039,10 +1039,10 @@ function Base.delete!(df::DataFrame, inds)

# we require ind to be stored and unique like in Base
# otherwise an error will be thrown and the data frame will get corrupted
return _delete!_helper(df, inds)
return _deleteat!_helper(df, inds)
end

function Base.delete!(df::DataFrame, inds::AbstractVector{Bool})
function Base.deleteat!(df::DataFrame, inds::AbstractVector{Bool})
if length(inds) != size(df, 1)
throw(BoundsError(df, (inds, :)))
end
Expand All @@ -1051,12 +1051,12 @@ function Base.delete!(df::DataFrame, inds::AbstractVector{Bool})
if VERSION <= v"1.6.2" && drop isa UnitRange{<:Integer}
drop = collect(drop)
end
return _delete!_helper(df, drop)
return _deleteat!_helper(df, drop)
end

Base.delete!(df::DataFrame, inds::Not) = delete!(df, axes(df, 1)[inds])
Base.deleteat!(df::DataFrame, inds::Not) = deleteat!(df, axes(df, 1)[inds])

function _delete!_helper(df::DataFrame, drop)
function _deleteat!_helper(df::DataFrame, drop)
cols = _columns(df)
isempty(cols) && return df

Expand Down
6 changes: 5 additions & 1 deletion src/deprecated.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
export by, aggregate

# TODO: remove definitions in 2.0 release
by(args...; kwargs...) = throw(ArgumentError("by function was removed from DataFrames.jl. " *
"Use the `combine(groupby(...), ...)` or `combine(f, groupby(...))` instead."))

aggregate(args...; kwargs...) = throw(ArgumentError("aggregate function was removed from DataFrames.jl. " *
"Use the `combine` function instead."))

# TODO: remove deprecation in 2.0 release
import Base.delete!
@deprecate delete!(df::DataFrame, inds) deleteat!(df::DataFrame, inds)
bkamins marked this conversation as resolved.
Show resolved Hide resolved
2 changes: 1 addition & 1 deletion src/groupeddataframe/groupeddataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ corrupt_msg(gd::GroupedDataFrame) =
"rows it contained when GroupedDataFrame was created which was " *
"$(length(getfield(gd, :groups))). The number of rows in the parent " *
"data frame has likely been changed unintentionally " *
"(e.g. using subset!, filter!, delete!, push!, or append! functions)."
"(e.g. using subset!, filter!, deleteat!, push!, or append! functions)."

function Base.getproperty(gd::GroupedDataFrame, f::Symbol)
@assert length(getfield(gd, :groups)) == nrow(getfield(gd, :parent)) corrupt_msg(gd)
Expand Down
4 changes: 2 additions & 2 deletions src/other/precompile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -533,7 +533,7 @@ function precompile(all=false)
Base.precompile(Tuple{typeof(do_call),typeof(maximum),Vector{Int},Vector{Int},Vector{Int},GroupedDataFrame{DataFrame},Tuple{Vector{Matrix{Float64}}},Int})
Base.precompile(Tuple{typeof(getindex),GroupedDataFrame{DataFrame},Vector{Tuple{Any, Int}}})
Base.precompile(Tuple{typeof(show),IOContext{IOBuffer},DataFrameRow{DataFrame, Index}})
Base.precompile(Tuple{typeof(delete!),DataFrame,InvertedIndex{Vector{Bool}}})
Base.precompile(Tuple{typeof(deleteat!),DataFrame,InvertedIndex{Vector{Bool}}})
Base.precompile(Tuple{Core.kwftype(typeof(leftjoin)),NamedTuple{(:on, :makeunique, :validate, :renamecols), Tuple{Vector{Any}, Bool, Pair{Bool, Bool}, Pair{Symbol, String}}},typeof(leftjoin),DataFrame,DataFrame})
Base.precompile(Tuple{typeof(groupreduce!),Vector{Union{Missing, ComplexF64}},Function,Function,Nothing,Nothing,Bool,Vector{Union{Missing, ComplexF64}},GroupedDataFrame{DataFrame}})
Base.precompile(Tuple{Core.kwftype(typeof(manipulate)),NamedTuple{(:copycols, :keeprows, :renamecols), Tuple{Bool, Bool, Bool}},typeof(manipulate),SubDataFrame{DataFrame, SubIndex{Index, Vector{Int}, Vector{Int}}, Vector{Int}},Base.OneTo{Int}})
Expand Down Expand Up @@ -1446,7 +1446,7 @@ function precompile(all=false)
Base.precompile(Tuple{typeof(row_group_slots),Tuple{Vector{Union{Missing, Int}}, Vector{Int}},Tuple{IntegerRefpool{Union{Missing, Int}}, IntegerRefpool{Int}},Tuple{IntegerRefarray{Vector{Union{Missing, Int}}}, IntegerRefarray{Vector{Int}}},Val{false},Vector{Int},Bool,Bool})
Base.precompile(Tuple{Reduce{typeof(Base.add_sum), Nothing, Nothing},Vector{Union{Missing, Int, Int8}},GroupedDataFrame{DataFrame}})
Base.precompile(Tuple{Core.kwftype(typeof(innerjoin)),NamedTuple{(:on,), Tuple{Vector{Pair{Symbol, Symbol}}}},typeof(innerjoin),DataFrame,DataFrame})
Base.precompile(Tuple{typeof(delete!),DataFrame,InvertedIndex{InvertedIndices.TupleVector{Tuple{Int, Int}}}})
Base.precompile(Tuple{typeof(deleteat!),DataFrame,InvertedIndex{InvertedIndices.TupleVector{Tuple{Int, Int}}}})
Base.precompile(Tuple{typeof(groupreduce!),Vector{Union{Missing, Int}},Function,Function,Nothing,Nothing,Bool,Vector{Union{Missing, Bool}},GroupedDataFrame{DataFrame}})
Base.precompile(Tuple{typeof(_semijoin_sorted),Vector{String},Vector{Union{Missing, String}},BitVector})
Base.precompile(Tuple{Core.kwftype(typeof(hcat)),NamedTuple{(:makeunique,), Tuple{Bool}},typeof(hcat),SubDataFrame{DataFrame, Index, Vector{Int}},SubDataFrame{DataFrame, Index, Vector{Int}}})
Expand Down
2 changes: 1 addition & 1 deletion src/subdataframe/subdataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ Base.setproperty!(::SubDataFrame, col_ind::AbstractString, v::Any) =

Base.copy(sdf::SubDataFrame) = parent(sdf)[rows(sdf), parentcols(index(sdf), :)]

Base.delete!(df::SubDataFrame, ind) =
Base.deleteat!(df::SubDataFrame, ind) =
throw(ArgumentError("SubDataFrame does not support deleting rows"))

function DataFrame(sdf::SubDataFrame; copycols::Bool=true)
Expand Down
6 changes: 3 additions & 3 deletions test/data.jl
Original file line number Diff line number Diff line change
Expand Up @@ -192,14 +192,14 @@ end
@test eltype(dropmissing!(df).b) == Int
end

@testset "delete! https://github.com/JuliaLang/julia/pull/41646 bug workaround" begin
@testset "deleteat! https://github.com/JuliaLang/julia/pull/41646 bug workaround" begin
# these tests will crash Julia if they are not correct
df = DataFrame(a= Vector{Union{Bool,Missing}}(missing, 10^4));
delete!(df, 2:(nrow(df) - 5))
deleteat!(df, 2:(nrow(df) - 5))
@test nrow(df) == 6

df = DataFrame(a= Vector{Union{Bool,Missing}}(missing, 10^4));
delete!(df, [false; trues(nrow(df) - 6); falses(5)])
deleteat!(df, [false; trues(nrow(df) - 6); falses(5)])
@test nrow(df) == 6
end

Expand Down
45 changes: 24 additions & 21 deletions test/dataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -582,74 +582,77 @@ end
@test_throws ArgumentError push!(df, "ab")
end

@testset "delete!" begin
@testset "deleteat!" begin
df = DataFrame(a=[1, 2], b=[3.0, 4.0])
@test_throws BoundsError delete!(df, [true, true, true])
@test delete!(df, 1) === df
@test_throws BoundsError deleteat!(df, [true, true, true])
@test deleteat!(df, 1) === df
@test df == DataFrame(a=[2], b=[4.0])

df = DataFrame(a=[1, 2], b=[3.0, 4.0])
@test delete!(df, 2) === df
@test deleteat!(df, 2) === df
@test df == DataFrame(a=[1], b=[3.0])

df = DataFrame(a=Union{Int, Missing}[1, 2], b=Union{Float64, Missing}[3.0, 4.0])
@test delete!(df, 1) === df
@test deleteat!(df, 1) === df
@test df == DataFrame(a=[2], b=[4.0])

df = DataFrame(a=Union{Int, Missing}[1, 2], b=Union{Float64, Missing}[3.0, 4.0])
@test delete!(df, 2) === df
@test deleteat!(df, 2) === df
@test df == DataFrame(a=[1], b=[3.0])

for v in (2:3, [2, 3])
df = DataFrame(a=Union{Int, Missing}[1, 2, 3], b=Union{Float64, Missing}[3.0, 4.0, 5.0])
@test delete!(df, v) === df
@test deleteat!(df, v) === df
@test df == DataFrame(a=[1], b=[3.0])

df = DataFrame(a=[1, 2, 3], b=[3.0, 4.0, 5.0])
@test delete!(df, v) === df
@test deleteat!(df, v) === df
@test df == DataFrame(a=[1], b=[3.0])
end

df = DataFrame()
@test_throws BoundsError delete!(df, 10)
@test_throws BoundsError delete!(df, [10])
@test_throws BoundsError deleteat!(df, 10)
@test_throws BoundsError deleteat!(df, [10])

df = DataFrame(a=[])
@test_throws BoundsError delete!(df, 10)
# the exception type changed between Julia 1.0.2 and Julia 1.1
# so we use their supertype below
@test_throws Exception delete!(df, [10])
@test_throws BoundsError deleteat!(df, 10)

if VERSION >= v"1.1"
@test_throws BoundsError deleteat!(df, [10])
else
@test_throws InexactError deleteat!(df, [10])
end

df = DataFrame(a=[1, 2, 3], b=[3, 2, 1])
@test_throws ArgumentError delete!(df, [3, 2])
@test_throws ArgumentError delete!(df, [2, 2])
@test delete!(df, [false, true, false]) === df
@test_throws ArgumentError deleteat!(df, [3, 2])
@test_throws ArgumentError deleteat!(df, [2, 2])
@test deleteat!(df, [false, true, false]) === df
@test df == DataFrame(a=[1, 3], b=[3, 1])

for v in (1, [1], 1:1, [true, false, false])
x = [1, 2, 3]
df = DataFrame(x=x)
@test delete!(df, v) == DataFrame(x=[2, 3])
@test deleteat!(df, v) == DataFrame(x=[2, 3])
@test x == [1, 2, 3]
end

for v in (1, [1], 1:1, [true, false, false], Not(2, 3), Not([false, true, true]))
x = [1, 2, 3]
df = DataFrame(x=x, copycols=false)
@test delete!(df, v) == DataFrame(x=[2, 3])
@test deleteat!(df, v) == DataFrame(x=[2, 3])
@test x == [2, 3]
end

for inds in (1, [1], [true, false])
df = DataFrame(x1=[1, 2])
df.x2 = df.x1
@test delete!(df, inds) === df
@test deleteat!(df, inds) === df
@test df == DataFrame(x1=[2], x2=[2])
end

df = DataFrame(a=1, b=2)
push!(df.b, 3)
@test_throws AssertionError delete!(df, 1)
@test_throws AssertionError deleteat!(df, 1)
end

@testset "describe" begin
Expand Down
10 changes: 10 additions & 0 deletions test/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -130,4 +130,14 @@ end
@test df3.x1 === x
end

@testset "delete!" begin
df = DataFrame(a=1:4, b=1, c=2)
@test delete!(copy(df), 1) == deleteat!(copy(df), 1)
@test delete!(copy(df), [1, 3]) == deleteat!(copy(df), [1, 3])
@test delete!(copy(df), [true, false, false, true]) == deleteat!(copy(df), [true, false, false, true])
@test delete!(copy(df), Not(1)) == deleteat!(copy(df), Not(1))
delete!(df, 2)
@test df == DataFrame(a=[1, 3, 4], b=1, c=2)
end

end # module
4 changes: 2 additions & 2 deletions test/subdataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -222,10 +222,10 @@ end
@test names(df) == names(x)[[4, 2]]
end

@testset "delete!" begin
@testset "deleteat!" begin
y = 1.0:10.0
df = view(DataFrame(y=y), 2:6, :)
@test_throws ArgumentError delete!(df, 1)
@test_throws ArgumentError deleteat!(df, 1)
end

@testset "parent" begin
Expand Down