JuliaData · pdeffebach · Oct 18, 2020 · Oct 10, 2020 · Oct 10, 2020 · Oct 13, 2020
diff --git a/NEWS.md b/NEWS.md
@@ -4,5 +4,6 @@
   order of rows returned after `DataFrames.transform(gd::GroupedDataFrame, args...)`. 
 * `@select` now supports `GroupedDataFrame` with the same behavior as 
   `DataFrames.select(df::GroupedDataFrame, args...)` ([#180])
+* `@orderby(gd::GroupedDataFrame, args...)` is now reserved and will error.
 * Restrictions are imposed on the types of column references allowed when using `cols`. 
-  Mixing integer column references with other types now errors. ([#183])
+  Mixing integer column references with other types now errors. ([#183])
diff --git a/README.md b/README.md
@@ -124,6 +124,17 @@ df2 = @byrow df begin
 end
 ```
 
+## `@orderby`
+
+Sort rows in a `DataFrame` by values in one of several columns or a 
+transformation of columns.
+
+```julia
+d = DataFrame(x = [3, 3, 3, 2, 1, 1, 1, 2, 1, 1], n = 1:10);
+@orderby(d, -1 .* :n)
+@orderby(d, :x, :n .- mean(:x))
+```
+
 ## Working with column names programmatically with `cols`
 
 DataFramesMeta.jl provides the special syntax `cols` for referring to 
@@ -320,7 +331,7 @@ The following operations are now included:
   GroupedDataFrame.
 
 - `orderby(g, d -> mean(d[:a]))` and `@orderby(g, mean(:a))` -- Sort
-  groups based on the given criteria. Returns a GroupedDataFrame.
+  rows by a given criteria. Returns a `DataFrame`.
 
 - `DataFrame(g)` -- Convert groups back to a DataFrame with the same
   group orderings.

diff --git a/src/DataFramesMeta.jl b/src/DataFramesMeta.jl
@@ -123,10 +123,10 @@ function fun_to_vec(kw::Expr; nolhs = false)
             end
          else
             if kw.args[1] isa Symbol
-                # cols(n) = f(:x) becomes [:x] => _f => n
+                # y = f(:x) becomes [:x] => _f => :y
                 output = QuoteNode(kw.args[1])
             elseif onearg(kw.args[1], :cols)
-                # y = f(:x) becomes [:x] => _f => :y
+                # cols(n) = f(:x) becomes [:x] => _f => n
                 output = kw.args[1].args[2]
             end
             t = quote
@@ -141,7 +141,7 @@ function fun_to_vec(kw::Expr; nolhs = false)
     end
 end
 
-fun_to_vec(kw::QuoteNode) = kw
+fun_to_vec(kw::QuoteNode; nolhs = false) = kw
 
 function make_source_concrete(x::AbstractVector)
     if isempty(x) || isconcretetype(eltype(x))
@@ -397,76 +397,78 @@ end
 ##
 ##############################################################################
 
-# needed on Julia 1.0 till #1489 in DataFrames is merged
-orderby(d::DataFrame, arg::DataFrame) = d[sortperm(arg), :]
-
-function orderby(d::AbstractDataFrame, args...)
-    D = typeof(d)(args...)
-    d[sortperm(D), :]
+function orderby_helper(x, args...)
+    t = (fun_to_vec(arg; nolhs = true) for arg in args)
+    quote
+        $DataFramesMeta.orderby($x, $(t...))
+    end
 end
 
-orderby(d::AbstractDataFrame, f::Function) = d[sortperm(f(d)), :]
-orderby(g::GroupedDataFrame, f::Function) = g[sortperm([f(x) for x in g])]
-
-orderbyconstructor(d::AbstractDataFrame) = (x...) -> DataFrame(Any[x...], Symbol.(1:length(x)))
-orderbyconstructor(d) = x -> x
+function orderby(x::AbstractDataFrame, @nospecialize(args...))
+    t = DataFrames.select(x, args...; copycols = false)
+    x[sortperm(t), :]
+end
 
-function orderby_helper(d, args...)
-    _D = gensym()
-    quote
-        let $_D = $d
-            $orderby($_D, $(with_anonymous(:($orderbyconstructor($_D)($(args...))))))
-        end
-    end
+function orderby(x::GroupedDataFrame, @nospecialize(args...))
+    throw(ArgumentError("@orderby with a GroupedDataFrame is reserved"))
 end
 
 """
     @orderby(d, i...)
 
-Sort by criteria. Normally used to sort groups in GroupedDataFrames.
+Sort rows by values in one of several columns or a transformation of columns.
+Always returns a fresh `DataFrame`. Does not accept a `GroupedDataFrame`.
+
+When given a `DataFrame`, `@orderby` applies the transformation
+given by its arguments (but does not create new columns) and sorts
+the given `DataFrame` on the result, returning a new `DataFrame`.
 
 ### Arguments
 
-* `d` : an AbstractDataFrame or GroupedDataFrame
+* `d` : an AbstractDataFrame
 * `i...` : expression for sorting
 
 ### Examples
 
 ```jldoctest
 julia> using DataFrames, DataFramesMeta, Statistics
 
-julia> d = DataFrame(n = 1:20, x = [3, 3, 3, 3, 1, 1, 1, 2, 1, 1,
-                                    2, 1, 1, 2, 2, 2, 3, 1, 1, 2]);
-
-julia> g = groupby(d, :x);
-
-julia> @orderby(g, mean(:n))
-GroupedDataFrame  3 groups with keys: Symbol[:x]
-First Group:
-5×2 SubDataFrame{Array{Int64,1}}
-│ Row │ n  │ x │
-├─────┼────┼───┤
-│ 1   │ 1  │ 3 │
-│ 2   │ 2  │ 3 │
-│ 3   │ 3  │ 3 │
-│ 4   │ 4  │ 3 │
-│ 5   │ 17 │ 3 │
-⋮
-Last Group:
-6×2 SubDataFrame{Array{Int64,1}}
-│ Row │ n  │ x │
-├─────┼────┼───┤
-│ 1   │ 8  │ 2 │
-│ 2   │ 11 │ 2 │
-│ 3   │ 14 │ 2 │
-│ 4   │ 15 │ 2 │
-│ 5   │ 16 │ 2 │
-│ 6   │ 20 │ 2 │
+julia> d = DataFrame(x = [3, 3, 3, 2, 1, 1, 1, 2, 1, 1], n = 1:10);
+
+julia> @orderby(d, -1 .* :n)
+10×2 DataFrame
+│ Row │ x     │ n     │
+│     │ Int64 │ Int64 │
+├─────┼───────┼───────┤
+│ 1   │ 1     │ 10    │
+│ 2   │ 1     │ 9     │
+│ 3   │ 2     │ 8     │
+│ 4   │ 1     │ 7     │
+│ 5   │ 1     │ 6     │
+│ 6   │ 1     │ 5     │
+│ 7   │ 2     │ 4     │
+│ 8   │ 3     │ 3     │
+│ 9   │ 3     │ 2     │
+│ 10  │ 3     │ 1     │
+
+julia> @orderby(d, :x, :n .- mean(:n))
+10×2 DataFrame
+│ Row │ x     │ n     │
+│     │ Int64 │ Int64 │
+├─────┼───────┼───────┤
+│ 1   │ 1     │ 5     │
+│ 2   │ 1     │ 6     │
+│ 3   │ 1     │ 7     │
+│ 4   │ 1     │ 9     │
+│ 5   │ 1     │ 10    │
+│ 6   │ 2     │ 4     │
+│ 7   │ 2     │ 8     │
+│ 8   │ 3     │ 1     │
+│ 9   │ 3     │ 2     │
+│ 10  │ 3     │ 3     │
 ```
-
 """
 macro orderby(d, args...)
-    # I don't esc just the input because I want _DF to be visible to the user
     esc(orderby_helper(d, args...))
 end
 

diff --git a/test/dataframes.jl b/test/dataframes.jl
@@ -277,7 +277,21 @@ end
     @test  @where(df, :A .> 1, :A .< 4, :B .> 1) == df[map(&, df.A .> 1, df.A .< 4, df.B .> 1),:]
 end
 
+@testset "orderby" begin
+    df = DataFrame(
+        g = [1, 1, 1, 2, 2],
+        i = 1:5,
+        t = ["a", "b", "c", "c", "e"],
+        y = [:v, :w, :x, :y, :z],
+        c = [:g, :quote, :body, :transform, missing]
+        )
+
+    gd = groupby(df, :g)
+
+    @test @orderby(df, :c).i == [3, 1, 2, 4, 5]
+    @test @orderby(df, -:g).i == [4, 5, 1, 2, 3]
+    @test @orderby(df, :t).i == [1, 2, 3, 4, 5]
+end
 
-@test DataFramesMeta.orderby(df, df[[1, 3, 2], :]) == df[[1, 3, 2], :]
 
 end # module
diff --git a/test/grouping.jl b/test/grouping.jl
@@ -14,11 +14,8 @@ g = groupby(d, :x, sort=true)
 @test  DataFrame(@where(g, length(:x) > 5)) == DataFrame(DataFramesMeta.where(g, x -> length(x.x) > 5))
 @test  DataFrame(@where(g, length(:x) > 5))[!, :n][1:3] == [5, 6, 7]
 
-@test  DataFrame(DataFramesMeta.orderby(g, x -> mean(x.n))) == DataFrame(@orderby(g, mean(:n)))
-
 @test @based_on(g, nsum = sum(:n)).nsum == [99, 84, 27]
 
-
 @testset "@based_on" begin
     df = DataFrame(
         g = [1, 1, 1, 2, 2],

diff --git a/test/linqmacro.jl b/test/linqmacro.jl
@@ -6,28 +6,26 @@ using DataFramesMeta
 using Statistics
 using Random
 
-Random.seed!(100)
-n = 100
-df = DataFrame(a = rand(1:3, n),
-               b = ["a","b","c","d"][rand(1:4, n)],
-               x = rand(n))
+df = DataFrame(a = repeat(1:5, outer = 20),
+               b = repeat(["a", "b", "c", "d"], inner = 25),
+               x = repeat(1:20, inner = 5))
 
 x = @where(df, :a .> 2, :b .!= "c")
 x = @transform(x, y = 10 * :x)
+x = @orderby(x, :x .- mean(:x))
 x = @by(x, :b, meanX = mean(:x), meanY = mean(:y))
-x = @orderby(x, -:meanX)
 x = @select(x, var = :b, :meanX, :meanY)
 
 x1 = @linq transform(where(df, :a .> 2, :b .!= "c"), y = 10 * :x)
-x1 = @linq by(x1, :b, meanX = mean(:x), meanY = mean(:y))
-x1 = @linq select(orderby(x1, -:meanX), var = :b, :meanX, :meanY)
+x1 = @linq by(orderby(x1, :x .- mean(:x)), :b, meanX = mean(:x), meanY = mean(:y))
+x1 = @linq select(x1, var = :b, :meanX, :meanY)
 
 ## chaining
 xlinq = @linq df  |>
     where(:a .> 2, :b .!= "c")  |>
     transform(y = 10 * :x)  |>
+    orderby(:x .- mean(:x)) |>
     by(:b, meanX = mean(:x), meanY = mean(:y))  |>
-    orderby(-:meanX)  |>
     select(var = :b, :meanX, :meanY)
 
 @test x == x1
@@ -36,17 +34,17 @@ xlinq = @linq df  |>
 xlinq2 = @linq df  |>
     where(:a .> 2, :b .!= "c")  |>
     transform(y = 10 * :x)  |>
+    orderby(:x .- mean(:x)) |>
     groupby(:b) |>
-    orderby(-mean(:x))  |>
     based_on(meanX = mean(:x), meanY = mean(:y))
 
 @test xlinq2[!, [:meanX, :meanY]] == xlinq[!, [:meanX, :meanY]]
 
 xlinq3 = @linq df  |>
     where(:a .> 2, :b .!= "c")  |>
     transform(y = 10 * :x)  |>
+    orderby(:x .- mean(:x)) |>
     DataFrames.groupby(:b) |>
-    orderby(-mean(:x))  |>
     based_on(meanX = mean(:x), meanY = mean(:y))
 
 @test xlinq3[!, [:meanX, :meanY]] == xlinq[!, [:meanX, :meanY]]
@@ -68,8 +66,8 @@ xlinq3 = @linq df  |>
     xlinq3 = @linq df  |>
         where(cols(a_sym) .> 2, :b .!= "c")  |>
         transform(cols(y_str) = 10 * cols(x_sym))  |>
-        DataFrames.groupby(b_str) |>
-        orderby(-mean(cols(x_sym)))  |>
+        orderby(cols(x_sym) .- mean(cols(x_sym)))  |>
+        groupby(b_str) |>
         based_on(cols("meanX") = mean(:x), meanY = mean(:y))
 
     @test isequal(xlinq3, DataFrame(b = "d", meanX = 40.0, meanY = 400.0))