JuliaData · bkamins · Feb 20, 2022 · Feb 18, 2022 · Feb 18, 2022 · Feb 18, 2022
diff --git a/NEWS.md b/NEWS.md
@@ -12,6 +12,9 @@
 * Add special syntax for `eachindex`, `groupindices`, and `proprow`
   to transformation mini-language
   ([#3001](https://github.com/JuliaData/DataFrames.jl/pull/3001)).
+* Add support for `reverse!`, `permute!`, `invpermute!`, `shuffle`,
+  and `shuffle!` functions. Improve functionality of `reverse`.
+  ([#3010](https://github.com/JuliaData/DataFrames.jl/pull/3010)).
 * `first` and `last` for `GroupedDataFrame` now support passing number of elements to get
   ([#3006](https://github.com/JuliaData/DataFrames.jl/issues/3006))
 

diff --git a/Project.toml b/Project.toml
@@ -14,6 +14,7 @@ Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
 PooledArrays = "2dfb63ee-cc39-5dd5-95bd-886bf059d720"
 PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 SortingAlgorithms = "a2af1166-a08f-5f64-846c-94a0d3cef48c"
@@ -47,13 +48,12 @@ DataValues = "e7dc6d0d-1eca-5fa6-8ad6-5aecde8b7ea5"
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
-Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d"
 ShiftedArrays = "1277b4bf-5013-50f5-be3d-901d8477a67a"
 
 [targets]
 test = ["CategoricalArrays", "Combinatorics", "DataStructures", "DataValues",
-        "Dates", "Logging", "OffsetArrays", "Random", "Test", "Unitful",
+        "Dates", "Logging", "OffsetArrays", "Test", "Unitful",
         "ShiftedArrays", "SparseArrays"]
diff --git a/docs/src/lib/functions.md b/docs/src/lib/functions.md
@@ -74,15 +74,20 @@ combine
 flatten
 hcat
 insertcols!
+invpermute!
 mapcols
 mapcols!
+permute!
 push!
 reduce
 repeat
 repeat!
 reverse
+reverse!
 select
 select!
+shuffle
+shuffle!
 transform
 transform!
 vcat

diff --git a/src/DataFrames.jl b/src/DataFrames.jl
@@ -8,6 +8,7 @@ using TableTraits, IteratorInterfaceExtensions
 import LinearAlgebra: norm
 using Markdown
 using PrettyTables
+using Random
 
 import DataAPI,
        DataAPI.All,

diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl
@@ -2261,9 +2261,10 @@ Base.setindex!(::AbstractDataFrame, ::Any, ::Union{Symbol, Integer, AbstractStri
     throw(ArgumentError("syntax df[column] is not supported use df[!, column] instead"))
 
 """
-    reverse(df::AbstractDataFrame)
+    reverse(df::AbstractDataFrame, start=1, stop=nrow(df))
 
 Return a data frame containing the rows in `df` in reversed order.
+If `start` and `stop` are provided, only rows in the `start:stop` range are affected.
 
 # Examples
 
@@ -2289,6 +2290,257 @@ julia> reverse(df)
    3 │     3      8     13
    4 │     2      7     12
    5 │     1      6     11
+
+julia> reverse(df, 2, 3)
+5×3 DataFrame
+ Row │ a      b      c
+     │ Int64  Int64  Int64
+─────┼─────────────────────
+   1 │     1      6     11
+   2 │     3      8     13
+   3 │     2      7     12
+   4 │     4      9     14
+   5 │     5     10     15
 ```
 """
-Base.reverse(df::AbstractDataFrame) = df[nrow(df):-1:1, :]
+Base.reverse(df::AbstractDataFrame, start::Integer=1, stop::Integer=nrow(df)) =
+    mapcols(x -> reverse(x, start, stop), df)
+
+"""
+    reverse!(df::AbstractDataFrame, start=1, stop=nrow(df))
+
+Mutate data frame in-place to reverse its row order.
+If `start` and `stop` are provided, only rows in the `start:stop` range are affected.
+
+`reverse!` will produce a correct result even if some columns of passed data frame
+are identical (checked with `===`). Otherwise, if two columns share some part of
+memory but are not identical (e.g. are different views of the same parent
+vector) then `reverse!` result might be incorrect.
+
+# Examples
+
+```jldoctest
+julia> df = DataFrame(a=1:5, b=6:10, c=11:15)
+5×3 DataFrame
+ Row │ a      b      c
+     │ Int64  Int64  Int64
+─────┼─────────────────────
+   1 │     1      6     11
+   2 │     2      7     12
+   3 │     3      8     13
+   4 │     4      9     14
+   5 │     5     10     15
+
+julia> reverse!(df)
+5×3 DataFrame
+ Row │ a      b      c
+     │ Int64  Int64  Int64
+─────┼─────────────────────
+   1 │     5     10     15
+   2 │     4      9     14
+   3 │     3      8     13
+   4 │     2      7     12
+   5 │     1      6     11
+
+julia> reverse!(df, 2, 3)
+5×3 DataFrame
+ Row │ a      b      c
+     │ Int64  Int64  Int64
+─────┼─────────────────────
+   1 │     5     10     15
+   2 │     3      8     13
+   3 │     4      9     14
+   4 │     2      7     12
+   5 │     1      6     11
+```
+"""
+function Base.reverse!(df::AbstractDataFrame, start::Integer=1, stop::Integer=nrow(df))
+    toskip = Set{Int}()
+    seen_cols = IdDict{Any, Nothing}()
+    for (i, col) in enumerate(eachcol(df))
+        if haskey(seen_cols, col)
+            push!(toskip, i)
+        else
+            seen_cols[col] = nothing
+        end
+    end
+
+    for (i, col) in enumerate(eachcol(df))
+        if !(i in toskip)
+            reverse!(col, start, stop)
+        end
+    end
+    return df
+end
+
+function _permutation_helper!(fun::Union{typeof(Base.permute!!), typeof(Base.invpermute!!)},
+                              df::AbstractDataFrame, p::AbstractVector{<:Integer})
+    toskip = Set{Int}()
+    seen_cols = IdDict{Any, Nothing}()
+    for (i, col) in enumerate(eachcol(df))
+        if haskey(seen_cols, col)
+            push!(toskip, i)
+        else
+            seen_cols[col] = nothing
+        end
+        # p might be a column of df so we make sure we unalias
+        if col === p
+            p = copy(p)
+        end
+    end
+
+    pp = similar(p)
+
+    for (i, col) in enumerate(eachcol(df))
+        if !(i in toskip)
+            copyto!(pp, p)
+            fun(col, pp)
+        end
+    end
+    return df
+end
+
+"""
+    permute!(df::AbstractDataFrame, p)
+
+Permute data frame `df` in-place, according to permutation `p`.
+No checking is done to verify that `p` is a permutation.
+
+To return a new data frame instead of permuting `df` in-place, use `df[p]`.
+Note that this is generally faster than `permute!(df, p)` for large data frames.
+
+`permute!` will produce a correct result even if some columns of passed data frame
+or permutation `p` are identical (checked with `===`). Otherwise, if two columns share
+some part of memory but are not identical (e.g. are different views of the same parent
+vector) then `permute!` result might be incorrect.
+
+# Examples
+julia> df = DataFrame(a=1:5, b=6:10, c=11:15)
+5×3 DataFrame
+ Row │ a      b      c
+     │ Int64  Int64  Int64
+─────┼─────────────────────
+   1 │     1      6     11
+   2 │     2      7     12
+   3 │     3      8     13
+   4 │     4      9     14
+   5 │     5     10     15
+
+julia> permute!(df, [5, 3, 1, 2, 4])
+5×3 DataFrame
+ Row │ a      b      c
+     │ Int64  Int64  Int64
+─────┼─────────────────────
+   1 │     5     10     15
+   2 │     3      8     13
+   3 │     1      6     11
+   4 │     2      7     12
+   5 │     4      9     14
+"""
+Base.permute!(df::AbstractDataFrame, p::AbstractVector{<:Integer}) =
+    _permutation_helper!(Base.permute!!, df, p)
+
+"""
+    invpermute!(df::AbstractDataFrame, p)
+
+Like [`permute!`](@ref), but the inverse of the given permutation is applied.
+
+`invpermute!` will produce a correct result even if some columns of passed data
+frame or permutation `p` are identical (checked with `===`). Otherwise, if two
+columns share some part of memory but are not identical (e.g. are different views
+of the same parent vector) then `permute!` result might be incorrect.
+
+# Examples
+
+julia> df = DataFrame(a=1:5, b=6:10, c=11:15)
+5×3 DataFrame
+ Row │ a      b      c
+     │ Int64  Int64  Int64
+─────┼─────────────────────
+   1 │     1      6     11
+   2 │     2      7     12
+   3 │     3      8     13
+   4 │     4      9     14
+   5 │     5     10     15
+
+julia> permute!(df, [5, 3, 1, 2, 4])
+5×3 DataFrame
+ Row │ a      b      c
+     │ Int64  Int64  Int64
+─────┼─────────────────────
+   1 │     5     10     15
+   2 │     3      8     13
+   3 │     1      6     11
+   4 │     2      7     12
+   5 │     4      9     14
+
+julia> invpermute!(df, [5, 3, 1, 2, 4])
+5×3 DataFrame
+ Row │ a      b      c
+     │ Int64  Int64  Int64
+─────┼─────────────────────
+   1 │     1      6     11
+   2 │     2      7     12
+   3 │     3      8     13
+   4 │     4      9     14
+   5 │     5     10     15
+"""
+Base.invpermute!(df::AbstractDataFrame, p::AbstractVector{<:Integer}) =
+    _permutation_helper!(Base.invpermute!!, df, p)
+
+"""
+    shuffle([rng=GLOBAL_RNG,] df::AbstractDataFrame)
+
+Return a copy of `df` with randomly permuted rows.
+The optional `rng` argument specifies a random number generator.
+
+# Examples
+
+julia> rng = MersenneTwister(1234);
+
+julia> shuffle(rng, DataFrame(a=1:5, b=1:5))
+5×2 DataFrame
+ Row │ a      b
+     │ Int64  Int64
+─────┼──────────────
+   1 │     2      2
+   2 │     1      1
+   3 │     4      4
+   4 │     3      3
+   5 │     5      5
+"""
+Random.shuffle(df::AbstractDataFrame) =
+    df[randperm(nrow(df)), :]
+Random.shuffle(r::AbstractRNG, df::AbstractDataFrame) =
+    df[randperm(r, nrow(df)), :]
+
+"""
+    shuffle!([rng=GLOBAL_RNG,] df::AbstractDataFrame)
+
+Randomly permute rows of `df` in-place.
+The optional `rng` argument specifies a random number generator.
+
+`shuffle!` will produce a correct result even if some columns of passed data frame
+are identical (checked with `===`). Otherwise, if two columns share some part of
+memory but are not identical (e.g. are different views of the same parent
+vector) then `shuffle!` result might be incorrect.
+
+# Examples
+
+julia> rng = MersenneTwister(1234);
+
+julia> shuffle!(rng, DataFrame(a=1:5, b=1:5))
+5×2 DataFrame
+ Row │ a      b
+     │ Int64  Int64
+─────┼──────────────
+   1 │     2      2
+   2 │     1      1
+   3 │     4      4
+   4 │     3      3
+   5 │     5      5
+"""
+Random.shuffle!(df::AbstractDataFrame) =
+    permute!(df, randperm(nrow(df)))
+Random.shuffle!(r::AbstractRNG, df::AbstractDataFrame) =
+    permute!(df, randperm(r, nrow(df)))
diff --git a/src/abstractdataframe/sort.jl b/src/abstractdataframe/sort.jl
@@ -682,25 +682,5 @@ function Base.sort!(df::AbstractDataFrame, cols=All();
     return sort!(df, _alg, ord)
 end
 
-function Base.sort!(df::AbstractDataFrame, a::Base.Sort.Algorithm, o::Base.Sort.Ordering)
-    toskip = Set{Int}()
-    seen_cols = IdDict{Any, Nothing}()
-    for (i, col) in enumerate(eachcol(df))
-        if haskey(seen_cols, col)
-            push!(toskip, i)
-        else
-            seen_cols[col] = nothing
-        end
-    end
-
-    p = _sortperm(df, a, o)
-    pp = similar(p)
-
-    for (i, col) in enumerate(eachcol(df))
-        if !(i in toskip)
-            copyto!(pp, p)
-            Base.permute!!(col, pp)
-        end
-    end
-    return df
-end
+Base.sort!(df::AbstractDataFrame, a::Base.Sort.Algorithm, o::Base.Sort.Ordering) =
+    permute!(df, _sortperm(df, a, o))