JuliaData · innerlee · Oct 5, 2019 · Oct 5, 2019 · Oct 5, 2019 · Oct 6, 2019
diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl
@@ -150,21 +150,21 @@ rename(f::Function, df::AbstractDataFrame)
 
 * `::AbstractDataFrame` : the updated result
 
-New names are processed sequentially. A new name must not already exist in the `DataFrame`
-at the moment an attempt to rename a column is performed.
+Each name is changed at most once. Permutation of names is allowed.
 
 **Examples**
 
 ```julia
 df = DataFrame(i = 1:10, x = rand(10), y = rand(["a", "b", "c"], 10))
 rename(df, :i => :A, :x => :X)
+rename(df, :x => :y, :y => :x)
 rename(df, [:i => :A, :x => :X])
 rename(df, Dict(:i => :A, :x => :X))
 rename(x -> Symbol(uppercase(string(x))), df)
 rename(df) do x
     Symbol(uppercase(string(x)))
 end
-rename!(df, Dict(:i =>: A, :x => :X))
+rename!(df, Dict(:i => :A, :x => :X))
 ```
 
 """

diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl
@@ -370,7 +370,7 @@ end
 
 # df[MultiRowIndex, MultiColumnIndex] => DataFrame
 @inline function Base.getindex(df::DataFrame, row_inds::AbstractVector{T},
-                               col_inds::Union{AbstractVector, Regex, Not, Between, All}) where T
+                               col_inds::Union{Regex, Not, Between, All}) where T
     @boundscheck if !checkindex(Bool, axes(df, 1), row_inds)
         throw(BoundsError("attempt to access a data frame with $(nrow(df)) " *
                           "rows at index $row_inds"))
@@ -382,6 +382,28 @@ end
     return DataFrame(new_columns, Index(_names(df)[selected_columns]), copycols=false)
 end
 
+
+# df[MultiRowIndex, MultiColumnIndex] => DataFrame
+@inline function Base.getindex(df::DataFrame, row_inds::AbstractVector{T},
+                               col_inds::AbstractVector) where T
+    @boundscheck if !checkindex(Bool, axes(df, 1), row_inds)
+        throw(BoundsError("attempt to access a data frame with $(nrow(df)) " *
+                          "rows at index $row_inds"))
+    end
+    if any(i->i isa Pair{Symbol, Symbol}, col_inds)
+        cc = [i isa Symbol ? i : i[1] for i in col_inds]
+        rr = filter(i -> i isa Pair{Symbol, Symbol}, col_inds)
+    else
+        cc = col_inds
+        rr = Pair{Symbol, Symbol}[]
+    end
+    selected_columns = index(df)[cc]
+    # Computing integer indices once for all columns is faster
+    selected_rows = T === Bool ? findall(row_inds) : row_inds
+    new_columns = AbstractVector[dv[selected_rows] for dv in _columns(df)[selected_columns]]
+    return rename!(DataFrame(new_columns, Index(_names(df)[selected_columns]), copycols=false), rr...)
+end
+
 @inline function Base.getindex(df::DataFrame, row_inds::AbstractVector{T}, ::Colon) where T
     @boundscheck if !checkindex(Bool, axes(df, 1), row_inds)
         throw(BoundsError("attempt to access a data frame with $(nrow(df)) " *
@@ -831,6 +853,16 @@ function select!(df::DataFrame, inds::AbstractVector{Int})
 end
 
 select!(df::DataFrame, c::Int) = select!(df, [c])
+
+function select!(df::DataFrame, c::AbstractVector{T}) where T
+    if any(i->i isa Pair{Symbol, Symbol}, c)
+        cc = [i isa Symbol ? i : i[1] for i in c]
+        rr = filter(i -> i isa Pair{Symbol, Symbol}, c)
+        return rename!(select!(df, index(df)[cc]), rr...)
+    end
+    return select!(df, index(df)[c])
+end
+
 select!(df::DataFrame, c::Any) = select!(df, index(df)[c])
 select!(df::DataFrame, c, cs...) = select!(df, All(c, cs...))
 
@@ -859,6 +891,9 @@ If `copycols=false`, then returned `DataFrame` shares column vectors with `df`.
 If `df` is a `SubDataFrame` then a `SubDataFrame` is returned if `copycols=false`
 and a `DataFrame` with freshly allocated columns otherwise.
 
+If `df` is a `DataFrame`, then select! support partially rename (`SubDataFrame`
+does not support rename yet).
+
 ### Examples
 
 ```jldoctest
@@ -879,6 +914,15 @@ julia> select(d, :b)
 │ 1   │ 4     │
 │ 2   │ 5     │
 │ 3   │ 6     │
+
+julia> select(d, [:b=>:x])
+3×1 DataFrame
+│ Row │ x     │
+│     │ Int64 │
+├─────┼───────┤
+│ 1   │ 4     │
+│ 2   │ 5     │
+│ 3   │ 6     │
 ```
 
 """
@@ -888,8 +932,21 @@ select(df::DataFrame, inds::AbstractVector{Int}; copycols::Bool=true) =
 
 select(df::DataFrame, c::Int; copycols::Bool=true) =
     select(df, [c], copycols=copycols)
+select(df::DataFrame, c::AbstractVector{T}; copycols::Bool=true) where T<:Union{Symbol,Integer} =
+    select(df, index(df)[c], copycols=copycols)
+
+function select(df::DataFrame, c::AbstractVector{T}; copycols::Bool=true) where T
+    if any(i->i isa Pair{Symbol, Symbol}, c)
+        cc = [i isa Symbol ? i : i[1] for i in c]
+        rr = filter(i -> i isa Pair{Symbol, Symbol}, c)
+        return rename!(select(df, index(df)[cc], copycols=copycols), rr...)
+    end
+    return select(df, index(df)[c], copycols=copycols)
+end
+
 select(df::DataFrame, c::Any; copycols::Bool=true) =
     select(df, index(df)[c], copycols=copycols)
+
 select(df::DataFrame, c, cs...; copycols::Bool=true) =
     select(df, All(c, cs...), copycols=copycols)
 

diff --git a/src/other/index.jl b/src/other/index.jl
@@ -46,14 +46,41 @@ function names!(x::Index, nms::Vector{Symbol}; makeunique::Bool=false)
 end
 
 function rename!(x::Index, nms)
+    xbackup = copy(x)
+    processedfrom = Set{Symbol}()
+    processedto = Set{Symbol}()
+    toholder = Dict{Symbol,Int}()
     for (from, to) in nms
+        if from ∈ processedfrom
+            merge!(empty!(x.lookup), xbackup.lookup)
+            x.names .= xbackup.names
+            throw(ArgumentError("Tried renaming $from multiple times."))
+        end
+        if to ∈ processedto
+            merge!(empty!(x.lookup), xbackup.lookup)
+            x.names .= xbackup.names
+            throw(ArgumentError("Tried renaming to $to multiple times."))
+        end
+        push!(processedfrom, from)
+        push!(processedto, to)
         from == to && continue # No change, nothing to do
+        if !haskey(xbackup, from)
+            merge!(empty!(x.lookup), xbackup.lookup)
+            x.names .= xbackup.names
+            throw(ArgumentError("Tried renaming $from to $to, when $from does not exist in the Index."))
+        end
         if haskey(x, to)
-            error("Tried renaming $from to $to, when $to already exists in the Index.")
+            toholder[to] = x.lookup[to]
         end
-        x.lookup[to] = col = pop!(x.lookup, from)
+        col = haskey(toholder, from) ? pop!(toholder, from) : pop!(x.lookup, from)
+        x.lookup[to] = col
         x.names[col] = to
     end
+    if !isempty(toholder)
+        merge!(empty!(x.lookup), xbackup.lookup)
+        x.names .= xbackup.names
+        throw(ArgumentError("Tried renaming to $(first(keys(toholder))), when it already exists in the Index."))
+    end
     return x
 end
 

diff --git a/test/dataframe.jl b/test/dataframe.jl
@@ -617,6 +617,36 @@ end
     @test d.b === df.b
 end
 
+@testset "select! rename" begin
+    df = DataFrame(a=1, b=2, c=3, d=4, e=5)
+    dfa = df.a
+    dfb = df.b
+    dfc = df.c
+    dfd = df.d
+    dfe = df.e
+
+    d = copy(df, copycols=false)
+    select!(d, [:a=>:b])
+    @test names(d) == [:b]
+    @test d.b === dfa
+
+    d = copy(df, copycols=false)
+    select!(d, [:b=>:a, :a=>:b, :e])
+    @test names(d) == [:a, :b, :e]
+    @test d.b === dfa
+    @test d.a === dfb
+    @test d.e === dfe
+
+    d = copy(df, copycols=false)
+    select!(d, [:a=>:aa, :b=>:bb, :c=>:cc, :d=>:dd, :e=>:ee])
+    @test names(d) == [:aa, :bb, :cc, :dd, :ee]
+    @test d.aa === dfa
+    @test d.bb === dfb
+    @test d.cc === dfc
+    @test d.dd === dfd
+    @test d.ee === dfe
+end
+
 @testset "select" begin
     df = DataFrame(a=1, b=2, c=3, d=4, e=5)
     @test_throws BoundsError select(df, 0)
@@ -819,6 +849,36 @@ end
     @test d.b === df.b
 end
 
+@testset "select rename" begin
+    df = DataFrame(a=1, b=2, c=3, d=4, e=5)
+
+    d = select(df, [:a=>:b])
+    @test names(d) == [:b]
+    @test d.b !== df.a
+    @test d.b == df.a
+
+    d = select(df, [:b=>:a, :a=>:b, :e])
+    @test names(d) == [:a, :b, :e]
+    @test d.b !== df.a
+    @test d.a !== df.b
+    @test d.e !== df.e
+    @test d.b == df.a
+    @test d.a == df.b
+    @test d.e == df.e
+
+    d = select(df, [:e, :b=>:a, :c], copycols=false)
+    @test names(d) == [:e, :a, :c]
+    @test d.e === df.e
+    @test d.a === df.b
+    @test d.c === df.c
+
+    d = select(df, [:e=>:a, :b, :a=>:c], copycols=false)
+    @test names(d) == [:a, :b, :c]
+    @test d.a === df.e
+    @test d.b === df.b
+    @test d.c === df.a
+end
+
 @testset "deleterows!" begin
     df = DataFrame(a=[1, 2], b=[3.0, 4.0])
     @test deleterows!(df, 1) === df
@@ -1127,6 +1187,33 @@ end
     @test names(df) == [:A_4, :B_4]
     @test rename!(x->Symbol(lowercase(string(x))), df) === df
     @test names(df) == [:a_4, :b_4]
+
+    df = DataFrame(A = 1:3, B = 'A':'C', C = [:x, :y, :z])
+    @test rename!(df, :A => :B, :B => :A) === df
+    @test names(df) == [:B, :A, :C]
+    @test rename!(df, :A => :B, :B => :A, :C => :D) === df
+    @test names(df) == [:A, :B, :D]
+    @test rename!(df, :A => :B, :B => :C, :D => :A) === df
+    @test names(df) == [:B, :C, :A]
+    @test rename!(df, :A => :C, :B => :A, :C => :B) === df
+    @test names(df) == [:A, :B, :C]
+    @test rename!(df, :A => :A, :B => :B, :C => :C) === df
+    @test names(df) == [:A, :B, :C]
+
+    @test_throws ArgumentError rename!(df, :X => :Y)
+    @test names(df) == [:A, :B, :C]
+    @test_throws ArgumentError rename!(df, :A => :X, :X => :Y)
+    @test names(df) == [:A, :B, :C]
+    @test_throws ArgumentError rename!(df, :A => :B)
+    @test names(df) == [:A, :B, :C]
+    @test_throws ArgumentError rename!(df, :A => :X, :A => :X)
+    @test names(df) == [:A, :B, :C]
+    @test_throws ArgumentError rename!(df, :A => :X, :B => :X)
+    @test names(df) == [:A, :B, :C]
+    @test_throws ArgumentError rename!(df, :A => :B, :B => :A, :C => :B)
+    @test names(df) == [:A, :B, :C]
+    @test_throws ArgumentError rename!(df, :A => :B, :B => :A, :A => :X)
+    @test names(df) == [:A, :B, :C]
 end
 
 @testset "size" begin
@@ -1527,6 +1614,17 @@ end
     @test df[:, [:y,:x]][!, :x] !== x
 end
 
+@testset "test getindex with rename" begin
+    x = [1,3]
+    y = [2,4]
+    df = DataFrame(x=x, y=y, copycols=false)
+    @test df[!, [:x=>:t]].t === x
+    @test df[:, [:x=>:t]].t == x
+    @test df[:, [:x=>:t]].t !== x
+    @test df[1:1, [:x=>:t]].t == x[1:1]
+    @test df[Not(2), [:x=>:t]].t == x[Not(2)]
+end
+
 @testset "test corner case of getindex" begin
     df = DataFrame(x=[1], y=[1])
     @test_throws ArgumentError df[true, 1:2]