diff --git a/NEWS.md b/NEWS.md index 997a18213e..d1ca8e2b7f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -33,6 +33,8 @@ selected as a property (e.g. `df.col .= 1`) is allowed when column does not exist and it allocates a fresh column ([#2655](https://github.com/JuliaData/DataFrames.jl/pull/2655)) +* `delete!` now correctly handles the case when columns of a data frame are aliased + ([#2690](https://github.com/JuliaData/DataFrames.jl/pull/2690)) ## Deprecated diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl index 3599c8369e..d3979988d9 100644 --- a/src/dataframe/dataframe.jl +++ b/src/dataframe/dataframe.jl @@ -966,10 +966,10 @@ function Base.delete!(df::DataFrame, inds) if !isempty(inds) && size(df, 2) == 0 throw(BoundsError(df, (inds, :))) end + # we require ind to be stored and unique like in Base # otherwise an error will be thrown and the data frame will get corrupted - foreach(col -> deleteat!(col, inds), _columns(df)) - return df + return _delete!_helper(df, inds) end function Base.delete!(df::DataFrame, inds::AbstractVector{Bool}) @@ -977,12 +977,35 @@ function Base.delete!(df::DataFrame, inds::AbstractVector{Bool}) throw(BoundsError(df, (inds, :))) end drop = findall(inds) - foreach(col -> deleteat!(col, drop), _columns(df)) - return df + return _delete!_helper(df, drop) end Base.delete!(df::DataFrame, inds::Not) = delete!(df, axes(df, 1)[inds]) +function _delete!_helper(df::DataFrame, drop) + cols = _columns(df) + isempty(cols) && return df + + n = nrow(df) + col1 = cols[1] + deleteat!(col1, drop) + newn = length(col1) + + for i in 2:length(cols) + col = cols[i] + if length(col) == n + deleteat!(col, drop) + end + end + + for i in 1:length(cols) + # this should never happen, but we add it for safety + @assert length(cols[i]) == newn corrupt_msg(df, i) + end + + return df +end + """ empty!(df::DataFrame) diff --git a/test/dataframe.jl b/test/dataframe.jl index 413b6866e6..68dffb7b6e 100644 --- a/test/dataframe.jl +++ b/test/dataframe.jl @@ -613,6 +613,17 @@ end @test delete!(df, v) == DataFrame(x=[2, 3]) @test x == [2, 3] end + + for inds in (1, [1], [true, false]) + df = DataFrame(x1=[1, 2]) + df.x2 = df.x1 + @test delete!(df, inds) === df + @test df == DataFrame(x1=[2], x2=[2]) + end + + df = DataFrame(a=1, b=2) + push!(df.b, 3) + @test_throws AssertionError delete!(df, 1) end @testset "describe" begin