Skip to content

Commit

Permalink
Allow rename when selecting
Browse files Browse the repository at this point in the history
Signed-off-by: lizz <lizz@sensetime.com>
  • Loading branch information
innerlee committed Oct 6, 2019
1 parent a740b42 commit 93219f4
Show file tree
Hide file tree
Showing 2 changed files with 129 additions and 1 deletion.
59 changes: 58 additions & 1 deletion src/dataframe/dataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ end

# df[MultiRowIndex, MultiColumnIndex] => DataFrame
@inline function Base.getindex(df::DataFrame, row_inds::AbstractVector{T},
col_inds::Union{AbstractVector, Regex, Not, Between, All}) where T
col_inds::Union{Regex, Not, Between, All}) where T
@boundscheck if !checkindex(Bool, axes(df, 1), row_inds)
throw(BoundsError("attempt to access a data frame with $(nrow(df)) " *
"rows at index $row_inds"))
Expand All @@ -382,6 +382,28 @@ end
return DataFrame(new_columns, Index(_names(df)[selected_columns]), copycols=false)
end


# df[MultiRowIndex, MultiColumnIndex] => DataFrame
@inline function Base.getindex(df::DataFrame, row_inds::AbstractVector{T},
col_inds::AbstractVector) where T
@boundscheck if !checkindex(Bool, axes(df, 1), row_inds)
throw(BoundsError("attempt to access a data frame with $(nrow(df)) " *
"rows at index $row_inds"))
end
if any(i->i isa Pair{Symbol, Symbol}, col_inds)
cc = [i isa Symbol ? i : i[1] for i in col_inds]
rr = filter(i -> i isa Pair{Symbol, Symbol}, col_inds)
else
cc = col_inds
rr = Pair{Symbol, Symbol}[]
end
selected_columns = index(df)[cc]
# Computing integer indices once for all columns is faster
selected_rows = T === Bool ? findall(row_inds) : row_inds
new_columns = AbstractVector[dv[selected_rows] for dv in _columns(df)[selected_columns]]
return rename!(DataFrame(new_columns, Index(_names(df)[selected_columns]), copycols=false), rr...)
end

@inline function Base.getindex(df::DataFrame, row_inds::AbstractVector{T}, ::Colon) where T
@boundscheck if !checkindex(Bool, axes(df, 1), row_inds)
throw(BoundsError("attempt to access a data frame with $(nrow(df)) " *
Expand Down Expand Up @@ -831,6 +853,16 @@ function select!(df::DataFrame, inds::AbstractVector{Int})
end

select!(df::DataFrame, c::Int) = select!(df, [c])

function select!(df::DataFrame, c::AbstractVector{T}) where T
if any(i->i isa Pair{Symbol, Symbol}, c)
cc = [i isa Symbol ? i : i[1] for i in c]
rr = filter(i -> i isa Pair{Symbol, Symbol}, c)
return rename!(select!(df, index(df)[cc]), rr...)
end
return select!(df, index(df)[c])
end

select!(df::DataFrame, c::Any) = select!(df, index(df)[c])
select!(df::DataFrame, c, cs...) = select!(df, All(c, cs...))

Expand Down Expand Up @@ -859,6 +891,9 @@ If `copycols=false`, then returned `DataFrame` shares column vectors with `df`.
If `df` is a `SubDataFrame` then a `SubDataFrame` is returned if `copycols=false`
and a `DataFrame` with freshly allocated columns otherwise.
If `df` is a `DataFrame`, then select! support partially rename (`SubDataFrame`
does not support rename yet).
### Examples
```jldoctest
Expand All @@ -879,6 +914,15 @@ julia> select(d, :b)
│ 1 │ 4 │
│ 2 │ 5 │
│ 3 │ 6 │
julia> select(d, [:b=>:x])
3×1 DataFrame
│ Row │ x │
│ │ Int64 │
├─────┼───────┤
│ 1 │ 4 │
│ 2 │ 5 │
│ 3 │ 6 │
```
"""
Expand All @@ -888,8 +932,21 @@ select(df::DataFrame, inds::AbstractVector{Int}; copycols::Bool=true) =

select(df::DataFrame, c::Int; copycols::Bool=true) =
select(df, [c], copycols=copycols)
select(df::DataFrame, c::AbstractVector{T}; copycols::Bool=true) where T<:Union{Symbol,Integer} =
select(df, index(df)[c], copycols=copycols)

function select(df::DataFrame, c::AbstractVector{T}; copycols::Bool=true) where T
if any(i->i isa Pair{Symbol, Symbol}, c)
cc = [i isa Symbol ? i : i[1] for i in c]
rr = filter(i -> i isa Pair{Symbol, Symbol}, c)
return rename!(select(df, index(df)[cc], copycols=copycols), rr...)
end
return select(df, index(df)[c], copycols=copycols)
end

select(df::DataFrame, c::Any; copycols::Bool=true) =
select(df, index(df)[c], copycols=copycols)

select(df::DataFrame, c, cs...; copycols::Bool=true) =
select(df, All(c, cs...), copycols=copycols)

Expand Down
71 changes: 71 additions & 0 deletions test/dataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -617,6 +617,36 @@ end
@test d.b === df.b
end

@testset "select! rename" begin
df = DataFrame(a=1, b=2, c=3, d=4, e=5)
dfa = df.a
dfb = df.b
dfc = df.c
dfd = df.d
dfe = df.e

d = copy(df, copycols=false)
select!(d, [:a=>:b])
@test names(d) == [:b]
@test d.b === dfa

d = copy(df, copycols=false)
select!(d, [:b=>:a, :a=>:b, :e])
@test names(d) == [:a, :b, :e]
@test d.b === dfa
@test d.a === dfb
@test d.e === dfe

d = copy(df, copycols=false)
select!(d, [:a=>:aa, :b=>:bb, :c=>:cc, :d=>:dd, :e=>:ee])
@test names(d) == [:aa, :bb, :cc, :dd, :ee]
@test d.aa === dfa
@test d.bb === dfb
@test d.cc === dfc
@test d.dd === dfd
@test d.ee === dfe
end

@testset "select" begin
df = DataFrame(a=1, b=2, c=3, d=4, e=5)
@test_throws BoundsError select(df, 0)
Expand Down Expand Up @@ -819,6 +849,36 @@ end
@test d.b === df.b
end

@testset "select rename" begin
df = DataFrame(a=1, b=2, c=3, d=4, e=5)

d = select(df, [:a=>:b])
@test names(d) == [:b]
@test d.b !== df.a
@test d.b == df.a

d = select(df, [:b=>:a, :a=>:b, :e])
@test names(d) == [:a, :b, :e]
@test d.b !== df.a
@test d.a !== df.b
@test d.e !== df.e
@test d.b == df.a
@test d.a == df.b
@test d.e == df.e

d = select(df, [:e, :b=>:a, :c], copycols=false)
@test names(d) == [:e, :a, :c]
@test d.e === df.e
@test d.a === df.b
@test d.c === df.c

d = select(df, [:e=>:a, :b, :a=>:c], copycols=false)
@test names(d) == [:a, :b, :c]
@test d.a === df.e
@test d.b === df.b
@test d.c === df.a
end

@testset "deleterows!" begin
df = DataFrame(a=[1, 2], b=[3.0, 4.0])
@test deleterows!(df, 1) === df
Expand Down Expand Up @@ -1554,6 +1614,17 @@ end
@test df[:, [:y,:x]][!, :x] !== x
end

@testset "test getindex with rename" begin
x = [1,3]
y = [2,4]
df = DataFrame(x=x, y=y, copycols=false)
@test df[!, [:x=>:t]].t === x
@test df[:, [:x=>:t]].t == x
@test df[:, [:x=>:t]].t !== x
@test df[1:1, [:x=>:t]].t == x[1:1]
@test df[Not(2), [:x=>:t]].t == x[Not(2)]
end

@testset "test corner case of getindex" begin
df = DataFrame(x=[1], y=[1])
@test_throws ArgumentError df[true, 1:2]
Expand Down

0 comments on commit 93219f4

Please sign in to comment.