From 870c0e6ec27514d19bcda8c778e1b45666d7cd3f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Mon, 19 Dec 2022 14:31:46 +0100
Subject: [PATCH 01/14] add sorting order to groupby

---
 NEWS.md                                  |  2 +
 docs/src/man/split_apply_combine.md      | 64 ++++++++++++++++++++++++
 src/DataFrames.jl                        |  4 +-
 src/groupeddataframe/groupeddataframe.jl | 45 +++++++++++++----
 test/grouping.jl                         |  5 ++
 5 files changed, 109 insertions(+), 11 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 7a81f0eef2..d1b6cf7a4e 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -10,6 +10,8 @@
 * Add support for `operator` keyword argument in `Cols`
   to take a set operation to apply to passed selectors (`union` by default)
   ([3224](https://github.com/JuliaData/DataFrames.jl/pull/3224))
+* Improve support for setting group order in `groupby`
+  ([3253](https://github.com/JuliaData/DataFrames.jl/pull/3253))
 
 # DataFrames.jl v1.4.4 Patch Release Notes
 
diff --git a/docs/src/man/split_apply_combine.md b/docs/src/man/split_apply_combine.md
index 8961744a89..31aa4e468e 100644
--- a/docs/src/man/split_apply_combine.md
+++ b/docs/src/man/split_apply_combine.md
@@ -1276,3 +1276,67 @@ two aspects:
   are exceptions to the standard operation specification syntax rules. They
   were added for user convenience.
 
+## Specifying group order in `groupby`
+
+By default order of groups produced by `groupby` is undefined.
+If you want the order of groups to follow the order of first appereance in
+the source data frame of a grouping key then pass `sort=false` keyword argument
+to `groupby`:
+
+```jldoctest sac
+julia> df
+6×3 DataFrame
+ Row │ customer_id  transaction_id  volume 
+     │ String       Int64           Int64
+─────┼─────────────────────────────────────
+   1 │ a                        12       2
+   2 │ b                        15       3
+   3 │ b                        19       1
+   4 │ b                        17       4
+   5 │ c                        13       5
+   6 │ c                        11       9
+
+julia> keys(groupby(df, :volume))
+6-element DataFrames.GroupKeys{GroupedDataFrame{DataFrame}}:
+ GroupKey: (volume = 2,)
+ GroupKey: (volume = 3,)
+ GroupKey: (volume = 1,)
+ GroupKey: (volume = 4,)
+ GroupKey: (volume = 5,)
+ GroupKey: (volume = 9,)
+```
+
+If you want to have them sorted in ascending order pass `sort=true`:
+
+```
+julia> keys(groupby(df, :volume, sort=true))
+6-element DataFrames.GroupKeys{GroupedDataFrame{DataFrame}}:
+ GroupKey: (volume = 1,)
+ GroupKey: (volume = 2,)
+ GroupKey: (volume = 3,)
+ GroupKey: (volume = 4,)
+ GroupKey: (volume = 5,)
+ GroupKey: (volume = 9,)
+```
+
+You can also use [`order`](@ref) wrapper when passing a column name to group by
+or pass a named tuple containing one or more of `alg`, `lt`, `by`, `rev`, and
+`order` fields that will be treated just like in [`sortperm`](@ref):
+
+```
+julia> keys(groupby(df, :customer_id, sort=(rev=true,)))
+3-element DataFrames.GroupKeys{GroupedDataFrame{DataFrame}}:
+ GroupKey: (customer_id = "c",)
+ GroupKey: (customer_id = "b",)
+ GroupKey: (customer_id = "a",)
+
+julia> keys(groupby(df, [:customer_id, order(:volume, rev=true)], sort=true))
+6-element DataFrames.GroupKeys{GroupedDataFrame{DataFrame}}:
+ GroupKey: (customer_id = "a", volume = 2)
+ GroupKey: (customer_id = "b", volume = 4)
+ GroupKey: (customer_id = "b", volume = 3)
+ GroupKey: (customer_id = "b", volume = 1)
+ GroupKey: (customer_id = "c", volume = 9)
+ GroupKey: (customer_id = "c", volume = 5)
+
+```
diff --git a/src/DataFrames.jl b/src/DataFrames.jl
index 4df4b26750..c5d8366214 100644
--- a/src/DataFrames.jl
+++ b/src/DataFrames.jl
@@ -139,6 +139,8 @@ include("subdataframe/subdataframe.jl")
 include("dataframerow/dataframerow.jl")
 include("dataframe/insertion.jl")
 
+include("abstractdataframe/sort.jl")
+
 include("groupeddataframe/groupeddataframe.jl")
 include("groupeddataframe/utils.jl")
 
@@ -165,8 +167,6 @@ include("groupeddataframe/show.jl")
 include("dataframerow/show.jl")
 include("abstractdataframe/io.jl")
 
-include("abstractdataframe/sort.jl")
-
 include("other/tables.jl")
 include("other/names.jl")
 include("other/metadata.jl")
diff --git a/src/groupeddataframe/groupeddataframe.jl b/src/groupeddataframe/groupeddataframe.jl
index 79ce980f75..80a7eeffbd 100644
--- a/src/groupeddataframe/groupeddataframe.jl
+++ b/src/groupeddataframe/groupeddataframe.jl
@@ -49,7 +49,7 @@ end
 
 """
     groupby(d::AbstractDataFrame, cols;
-            sort::Union{Bool, Nothing}=nothing,
+            sort::Union{Bool, Nothing, NamedTuple}=nothing,
             skipmissing::Bool=false)
 
 Return a `GroupedDataFrame` representing a view of an `AbstractDataFrame` split
@@ -64,18 +64,23 @@ into row groups.
   if `sort=nothing` (the default) then the fastest available grouping algorithm
   is picked and in consequence the order of groups in the result is undefined
   and may change in future releases; below a description of the current
-  implementation is provided.
+  implementation is provided. Additionally `sort` can be a `NamedTuple` having
+  some or all of `alg`, `lt`, `by`, `rev`, and `order` fields. In this case
+  the groups are sorted and their order follows the [`sortperm`](@ref) order.
 - `skipmissing` : whether to skip groups with `missing` values in one of the
   grouping columns `cols`
 
 # Details
-An iterator over a `GroupedDataFrame` returns a `SubDataFrame` view
+, NamedTupleAn iterator over a `GroupedDataFrame` returns a `SubDataFrame` view
 for each grouping into `df`.
 Within each group, the order of rows in `df` is preserved.
 
 `cols` can be any valid data frame indexing expression.
 In particular if it is an empty vector then a single-group `GroupedDataFrame`
-is created.
+is created. As a special case, if a list of columns to group by is passed
+as a vector it can contain columns wrapped in [`order`](@ref) that will be
+used to determine order of groups if `sort` is `true` or a `NamedTuple` (if
+`sort` is `nothing` or `false`, then passing `order` is an error).
 
 A `GroupedDataFrame` also supports indexing by groups, `select`, `transform`,
 and `combine` (which applies a function to each group and combines the result
@@ -209,9 +214,9 @@ julia> for g in gd
 ```
 """
 function groupby(df::AbstractDataFrame, cols;
-                 sort::Union{Bool,Nothing}=nothing, skipmissing::Bool=false)
+                 sort::Union{Bool,Nothing,NamedTuple}=nothing, skipmissing::Bool=false)
     _check_consistency(df)
-    idxcols = index(df)[cols]
+    idxcols = index(df)[normalize_grouping_cols(cols, sort === true || sort isa NamedTuple)]
     if isempty(idxcols)
         return GroupedDataFrame(df, Symbol[], ones(Int, nrow(df)),
                                 nothing, nothing, nothing, nrow(df) == 0 ? 0 : 1,
@@ -222,17 +227,19 @@ function groupby(df::AbstractDataFrame, cols;
     groups = Vector{Int}(undef, nrow(df))
     ngroups, rhashes, gslots, sorted =
         row_group_slots(ntuple(i -> sdf[!, i], ncol(sdf)), Val(false),
-                        groups, skipmissing, sort)
+                        groups, skipmissing, sort === true)
 
     gd = GroupedDataFrame(df, copy(_names(sdf)), groups, nothing, nothing, nothing, ngroups, nothing,
                           Threads.ReentrantLock())
 
     # sort groups if row_group_slots hasn't already done that
-    if sort === true && !sorted
+    if (sort === true && !sorted) || (sort isa NamedTuple)
         # Find index of representative row for each group
         idx = Vector{Int}(undef, length(gd))
         fillfirst!(nothing, idx, 1:nrow(parent(gd)), gd)
-        group_invperm = invperm(sortperm(view(parent(gd)[!, gd.cols], idx, :)))
+        sort_kwargs = sort isa NamedTuple ? sort : NamedTuple()
+        group_invperm = invperm(sortperm(view(parent(gd), idx, :),
+                                         cols; sort_kwargs...))
         groups = gd.groups
         @inbounds for i in eachindex(groups)
             gix = groups[i]
@@ -243,6 +250,26 @@ function groupby(df::AbstractDataFrame, cols;
     return gd
 end
 
+normalize_grouping_cols(cols, sort::Bool) = cols
+
+function normalize_grouping_cols(cols::UserColOrdering, sort::Bool)
+    sort || throw(ArgumentError("passing `order` is only allowed if `sort` " *
+                                "is `true` or a `NamedTuple`"))
+    return cols.col
+end
+
+function normalize_grouping_cols(cols::AbstractVector, sort::Bool)
+    has_order = any(x -> x isa UserColOrdering, cols)
+    if has_order
+        sort || throw(ArgumentError("passing `order` is only allowed if `sort` " *
+                                    "is `true` or a `NamedTuple`"))
+        return Any[x isa UserColOrdering ? x.col : x for x in cols]
+    else
+        return cols
+    end
+    return cols.col
+end
+
 function genkeymap(gd, cols)
     # currently we use Dict{Any, Int} because then field :keymap in GroupedDataFrame
     # has a concrete type which makes the access to it faster as we do not have a dynamic
diff --git a/test/grouping.jl b/test/grouping.jl
index 670179ff87..2c3410163c 100644
--- a/test/grouping.jl
+++ b/test/grouping.jl
@@ -4378,4 +4378,9 @@ end
     @test_throws ArgumentError combine(gdf, :a => (x -> [Dict('x' => 1)]) => AsTable)
 end
 
+@testset "sorting API" begin
+    Random.seed!(1234)
+    df = DataFrame(a=rand(-10, 10, 100), b=randperm(100))
+end
+
 end # module

From 806f87831e28ac3497d396b90083a3740e57fcc2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Mon, 19 Dec 2022 14:32:29 +0100
Subject: [PATCH 02/14] add tests

---
 test/grouping.jl | 38 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 37 insertions(+), 1 deletion(-)

diff --git a/test/grouping.jl b/test/grouping.jl
index 2c3410163c..3549d1bcc6 100644
--- a/test/grouping.jl
+++ b/test/grouping.jl
@@ -4380,7 +4380,43 @@ end
 
 @testset "sorting API" begin
     Random.seed!(1234)
-    df = DataFrame(a=rand(-10, 10, 100), b=randperm(100))
+    df = DataFrame(a=rand(-10:10, 100), b=rand(-10:10, 100), c=1:100)
+    for col in (:a, "a", 1, :b, "b", 2, :c, "c", 3) 
+        gdf = groupby(df, col, sort=true)
+        @test issorted(DataFrame(gdf)[:, col])
+        @test all(x -> issorted(x.c), gdf)
+        gdf = groupby(df, col, sort=NamedTuple())
+        @test issorted(DataFrame(gdf)[:, col])
+        @test all(x -> issorted(x.c), gdf)
+        gdf = groupby(df, col, sort=(rev=true,))
+        @test issorted(DataFrame(gdf)[:, col], rev=true)
+        @test all(x -> issorted(x.c), gdf)
+        gdf = groupby(df, order(col, by=abs), sort=(rev=true,))
+        @test issorted(DataFrame(gdf)[:, col], rev=true, by=abs)
+        @test all(x -> issorted(x.c), gdf)
+    end
+
+    gdf = groupby(df, [:a, :b], sort=true)
+    @test issorted(DataFrame(gdf), [:a, :b])
+    @test all(x -> issorted(x.c), gdf)
+    gdf = groupby(df, [:a, :b], sort=NamedTuple())
+    @test issorted(DataFrame(gdf), [:a, :b])
+    @test all(x -> issorted(x.c), gdf)
+    gdf = groupby(df, [:a, :b], sort=(rev=true,))
+    @test issorted(DataFrame(gdf), [:a, :b], rev=true)
+    @test all(x -> issorted(x.c), gdf)
+    gdf = groupby(df, [order(:a, by=abs), :b], sort=(rev=true,))
+    @test issorted(DataFrame(gdf), [order(:a, by=abs), :b], rev=true)
+    @test all(x -> issorted(x.c), gdf)
+    gdf = groupby(df, [:a, order(:b, rev=false)], sort=(rev=true,))
+    @test issorted(DataFrame(gdf), [:a, order(:b, rev=false)], rev=true)
+    @test all(x -> issorted(x.c), gdf)
+
+    @test_throws ArgumentError groupby(df, order(:a))
+    @test_throws ArgumentError groupby(df, order(:a), sort=false)
+    @test_throws ArgumentError groupby(df, [:b, order(:a)])
+    @test_throws ArgumentError groupby(df, [:b, order(:a)], sort=false)
+    @test_throws MethodError groupby(df, :a, sort=(x=1,))
 end
 
 end # module

From 4bee9dc615630da1d76c8ddc0b088a7fb0cf3978 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Mon, 19 Dec 2022 14:34:49 +0100
Subject: [PATCH 03/14] fix typo

---
 src/groupeddataframe/groupeddataframe.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/groupeddataframe/groupeddataframe.jl b/src/groupeddataframe/groupeddataframe.jl
index 80a7eeffbd..980f3bd093 100644
--- a/src/groupeddataframe/groupeddataframe.jl
+++ b/src/groupeddataframe/groupeddataframe.jl
@@ -71,7 +71,8 @@ into row groups.
   grouping columns `cols`
 
 # Details
-, NamedTupleAn iterator over a `GroupedDataFrame` returns a `SubDataFrame` view
+
+An iterator over a `GroupedDataFrame` returns a `SubDataFrame` view
 for each grouping into `df`.
 Within each group, the order of rows in `df` is preserved.
 

From dfe9460a34c80a94be98b81d0533635db2794998 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Mon, 19 Dec 2022 15:31:20 +0100
Subject: [PATCH 04/14] fix wrong logic

---
 src/groupeddataframe/groupeddataframe.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/groupeddataframe/groupeddataframe.jl b/src/groupeddataframe/groupeddataframe.jl
index 980f3bd093..01752ee25f 100644
--- a/src/groupeddataframe/groupeddataframe.jl
+++ b/src/groupeddataframe/groupeddataframe.jl
@@ -228,7 +228,7 @@ function groupby(df::AbstractDataFrame, cols;
     groups = Vector{Int}(undef, nrow(df))
     ngroups, rhashes, gslots, sorted =
         row_group_slots(ntuple(i -> sdf[!, i], ncol(sdf)), Val(false),
-                        groups, skipmissing, sort === true)
+                        groups, skipmissing, sort isa NamedTuple ? true : sort)
 
     gd = GroupedDataFrame(df, copy(_names(sdf)), groups, nothing, nothing, nothing, ngroups, nothing,
                           Threads.ReentrantLock())

From 669e439ad7b7bcf0ebb2228b4d634e4c8721fb19 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Mon, 19 Dec 2022 15:44:44 +0100
Subject: [PATCH 05/14] make a correct choice when using nt

---
 src/groupeddataframe/groupeddataframe.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/groupeddataframe/groupeddataframe.jl b/src/groupeddataframe/groupeddataframe.jl
index 01752ee25f..70a10cd3a8 100644
--- a/src/groupeddataframe/groupeddataframe.jl
+++ b/src/groupeddataframe/groupeddataframe.jl
@@ -228,7 +228,7 @@ function groupby(df::AbstractDataFrame, cols;
     groups = Vector{Int}(undef, nrow(df))
     ngroups, rhashes, gslots, sorted =
         row_group_slots(ntuple(i -> sdf[!, i], ncol(sdf)), Val(false),
-                        groups, skipmissing, sort isa NamedTuple ? true : sort)
+                        groups, skipmissing, sort isa NamedTuple ? nothing : sort)
 
     gd = GroupedDataFrame(df, copy(_names(sdf)), groups, nothing, nothing, nothing, ngroups, nothing,
                           Threads.ReentrantLock())

From 35e7f045b79b8d454abb27b11425a74f20d58abf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Mon, 19 Dec 2022 17:23:15 +0100
Subject: [PATCH 06/14] minor fixes

---
 docs/src/man/split_apply_combine.md      | 13 ++++++++-----
 src/groupeddataframe/groupeddataframe.jl |  1 -
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/docs/src/man/split_apply_combine.md b/docs/src/man/split_apply_combine.md
index 31aa4e468e..2dbbca77bd 100644
--- a/docs/src/man/split_apply_combine.md
+++ b/docs/src/man/split_apply_combine.md
@@ -1284,10 +1284,10 @@ the source data frame of a grouping key then pass `sort=false` keyword argument
 to `groupby`:
 
 ```jldoctest sac
-julia> df
-6×3 DataFrame
+julia> push!(df, ["a", 100, 100]) # push row with large integer values to disable default sorting
+7×3 DataFrame
  Row │ customer_id  transaction_id  volume 
-     │ String       Int64           Int64
+     │ String       Int64           Int64  
 ─────┼─────────────────────────────────────
    1 │ a                        12       2
    2 │ b                        15       3
@@ -1295,28 +1295,31 @@ julia> df
    4 │ b                        17       4
    5 │ c                        13       5
    6 │ c                        11       9
+   7 │ a                       100     100
 
 julia> keys(groupby(df, :volume))
-6-element DataFrames.GroupKeys{GroupedDataFrame{DataFrame}}:
+7-element DataFrames.GroupKeys{GroupedDataFrame{DataFrame}}:
  GroupKey: (volume = 2,)
  GroupKey: (volume = 3,)
  GroupKey: (volume = 1,)
  GroupKey: (volume = 4,)
  GroupKey: (volume = 5,)
  GroupKey: (volume = 9,)
+ GroupKey: (volume = 100,)
 ```
 
 If you want to have them sorted in ascending order pass `sort=true`:
 
 ```
 julia> keys(groupby(df, :volume, sort=true))
-6-element DataFrames.GroupKeys{GroupedDataFrame{DataFrame}}:
+7-element DataFrames.GroupKeys{GroupedDataFrame{DataFrame}}:
  GroupKey: (volume = 1,)
  GroupKey: (volume = 2,)
  GroupKey: (volume = 3,)
  GroupKey: (volume = 4,)
  GroupKey: (volume = 5,)
  GroupKey: (volume = 9,)
+ GroupKey: (volume = 100,)
 ```
 
 You can also use [`order`](@ref) wrapper when passing a column name to group by
diff --git a/src/groupeddataframe/groupeddataframe.jl b/src/groupeddataframe/groupeddataframe.jl
index 70a10cd3a8..d04aedd8e8 100644
--- a/src/groupeddataframe/groupeddataframe.jl
+++ b/src/groupeddataframe/groupeddataframe.jl
@@ -268,7 +268,6 @@ function normalize_grouping_cols(cols::AbstractVector, sort::Bool)
     else
         return cols
     end
-    return cols.col
 end
 
 function genkeymap(gd, cols)

From 3b6ad89bd7d8fbdc7eff59ea2ea677cc85251b26 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Sun, 25 Dec 2022 09:46:16 +0100
Subject: [PATCH 07/14] Apply suggestions from code review

Co-authored-by: Milan Bouchet-Valat <nalimilan@club.fr>
---
 docs/src/man/split_apply_combine.md      | 4 ++--
 src/groupeddataframe/groupeddataframe.jl | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/src/man/split_apply_combine.md b/docs/src/man/split_apply_combine.md
index 2dbbca77bd..c438b5792f 100644
--- a/docs/src/man/split_apply_combine.md
+++ b/docs/src/man/split_apply_combine.md
@@ -1280,7 +1280,7 @@ two aspects:
 
 By default order of groups produced by `groupby` is undefined.
 If you want the order of groups to follow the order of first appereance in
-the source data frame of a grouping key then pass `sort=false` keyword argument
+the source data frame of a grouping key then pass the `sort=false` keyword argument
 to `groupby`:
 
 ```jldoctest sac
@@ -1322,7 +1322,7 @@ julia> keys(groupby(df, :volume, sort=true))
  GroupKey: (volume = 100,)
 ```
 
-You can also use [`order`](@ref) wrapper when passing a column name to group by
+You can also use the [`order`](@ref) wrapper when passing a column name to group by
 or pass a named tuple containing one or more of `alg`, `lt`, `by`, `rev`, and
 `order` fields that will be treated just like in [`sortperm`](@ref):
 
diff --git a/src/groupeddataframe/groupeddataframe.jl b/src/groupeddataframe/groupeddataframe.jl
index d04aedd8e8..08db5eab25 100644
--- a/src/groupeddataframe/groupeddataframe.jl
+++ b/src/groupeddataframe/groupeddataframe.jl
@@ -80,7 +80,7 @@ Within each group, the order of rows in `df` is preserved.
 In particular if it is an empty vector then a single-group `GroupedDataFrame`
 is created. As a special case, if a list of columns to group by is passed
 as a vector it can contain columns wrapped in [`order`](@ref) that will be
-used to determine order of groups if `sort` is `true` or a `NamedTuple` (if
+used to determine the order of groups if `sort` is `true` or a `NamedTuple` (if
 `sort` is `nothing` or `false`, then passing `order` is an error).
 
 A `GroupedDataFrame` also supports indexing by groups, `select`, `transform`,

From aee91ec573f7186201473d1bf70818d794965803 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Sun, 25 Dec 2022 10:00:59 +0100
Subject: [PATCH 08/14] changes after code review

---
 src/groupeddataframe/groupeddataframe.jl | 31 ++++++++++++------------
 test/grouping.jl                         |  6 +++++
 2 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/src/groupeddataframe/groupeddataframe.jl b/src/groupeddataframe/groupeddataframe.jl
index d04aedd8e8..d6daf63458 100644
--- a/src/groupeddataframe/groupeddataframe.jl
+++ b/src/groupeddataframe/groupeddataframe.jl
@@ -58,15 +58,21 @@ into row groups.
 # Arguments
 - `df` : an `AbstractDataFrame` to split
 - `cols` : data frame columns to group by. Can be any column selector
-  ($COLUMNINDEX_STR; $MULTICOLUMNINDEX_STR).
-- `sort` : if `sort=true` sort groups according to the values of the grouping columns
-  `cols`; if `sort=false` groups are created in their order of appearance in `df`
-  if `sort=nothing` (the default) then the fastest available grouping algorithm
-  is picked and in consequence the order of groups in the result is undefined
-  and may change in future releases; below a description of the current
-  implementation is provided. Additionally `sort` can be a `NamedTuple` having
-  some or all of `alg`, `lt`, `by`, `rev`, and `order` fields. In this case
-  the groups are sorted and their order follows the [`sortperm`](@ref) order.
+  ($COLUMNINDEX_STR; $MULTICOLUMNINDEX_STR). In particular if the selector
+  picks no columns then a single-group `GroupedDataFrame` is created. As a
+  special case, if a list of columns to group by is passed as a vector it can
+  contain columns wrapped in [`order`](@ref) that will be used to determine
+  order of groups if `sort` is `true` or a `NamedTuple` (if `sort` is `nothing`
+  or `false`, then passing `order` is an error).
+- `sort` : if `sort=true` sort groups according to the values of the grouping
+  columns `cols`; if `sort=false` groups are created in their order of
+  appearance in `df` if `sort=nothing` (the default) then the fastest available
+  grouping algorithm is picked and in consequence the order of groups in the
+  result is undefined and may change in future releases; below a description of
+  the current implementation is provided. Additionally `sort` can be a
+  `NamedTuple` having some or all of `alg`, `lt`, `by`, `rev`, and `order`
+  fields. In this case the groups are sorted and their order follows the
+  [`sortperm`](@ref) order.
 - `skipmissing` : whether to skip groups with `missing` values in one of the
   grouping columns `cols`
 
@@ -76,13 +82,6 @@ An iterator over a `GroupedDataFrame` returns a `SubDataFrame` view
 for each grouping into `df`.
 Within each group, the order of rows in `df` is preserved.
 
-`cols` can be any valid data frame indexing expression.
-In particular if it is an empty vector then a single-group `GroupedDataFrame`
-is created. As a special case, if a list of columns to group by is passed
-as a vector it can contain columns wrapped in [`order`](@ref) that will be
-used to determine order of groups if `sort` is `true` or a `NamedTuple` (if
-`sort` is `nothing` or `false`, then passing `order` is an error).
-
 A `GroupedDataFrame` also supports indexing by groups, `select`, `transform`,
 and `combine` (which applies a function to each group and combines the result
 into a data frame).
diff --git a/test/grouping.jl b/test/grouping.jl
index 3549d1bcc6..3a1b91e1e1 100644
--- a/test/grouping.jl
+++ b/test/grouping.jl
@@ -4394,6 +4394,9 @@ end
         gdf = groupby(df, order(col, by=abs), sort=(rev=true,))
         @test issorted(DataFrame(gdf)[:, col], rev=true, by=abs)
         @test all(x -> issorted(x.c), gdf)
+        gdf = groupby(df, col, sort=false)
+        @test getindex.(keys(gdf), 1) == unique(df[!, col])
+        @test all(x -> issorted(x.c), gdf)
     end
 
     gdf = groupby(df, [:a, :b], sort=true)
@@ -4411,6 +4414,9 @@ end
     gdf = groupby(df, [:a, order(:b, rev=false)], sort=(rev=true,))
     @test issorted(DataFrame(gdf), [:a, order(:b, rev=false)], rev=true)
     @test all(x -> issorted(x.c), gdf)
+    gdf = groupby(df, [:a, :b], sort=false)
+    @test Tuple.(keys(gdf)) == unique(Tuple.(eachrow(df[!, [:a, :b]])))
+    @test all(x -> issorted(x.c), gdf)
 
     @test_throws ArgumentError groupby(df, order(:a))
     @test_throws ArgumentError groupby(df, order(:a), sort=false)

From c8759e7e3ba3192210201121827164e861c2b28c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Sun, 25 Dec 2022 12:26:22 +0100
Subject: [PATCH 09/14] Update src/groupeddataframe/groupeddataframe.jl

Co-authored-by: Milan Bouchet-Valat <nalimilan@club.fr>
---
 src/groupeddataframe/groupeddataframe.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/groupeddataframe/groupeddataframe.jl b/src/groupeddataframe/groupeddataframe.jl
index 34522de857..b3a6b3c218 100644
--- a/src/groupeddataframe/groupeddataframe.jl
+++ b/src/groupeddataframe/groupeddataframe.jl
@@ -66,7 +66,7 @@ into row groups.
   `nothing` or `false`, then passing `order` is an error).
 - `sort` : if `sort=true` sort groups according to the values of the grouping
   columns `cols`; if `sort=false` groups are created in their order of
-  appearance in `df` if `sort=nothing` (the default) then the fastest available
+  appearance in `df`; if `sort=nothing` (the default) then the fastest available
   grouping algorithm is picked and in consequence the order of groups in the
   result is undefined and may change in future releases; below a description of
   the current implementation is provided. Additionally `sort` can be a

From ca766d92555264cd61c37b9270b77326564ecc9d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Sun, 25 Dec 2022 13:48:51 +0100
Subject: [PATCH 10/14] automatically set sort=NamedTuple() when order is
 passed

---
 src/groupeddataframe/groupeddataframe.jl |  50 ++++----
 test/grouping.jl                         | 155 +++++++++++++++++------
 2 files changed, 144 insertions(+), 61 deletions(-)

diff --git a/src/groupeddataframe/groupeddataframe.jl b/src/groupeddataframe/groupeddataframe.jl
index b3a6b3c218..38fc61a295 100644
--- a/src/groupeddataframe/groupeddataframe.jl
+++ b/src/groupeddataframe/groupeddataframe.jl
@@ -63,7 +63,8 @@ into row groups.
   special case, if a list of columns to group by is passed as a vector it can
   contain columns wrapped in [`order`](@ref) that will be used to determine
   the order of groups if `sort` is `true` or a `NamedTuple` (if `sort` is
-  `nothing` or `false`, then passing `order` is an error).
+  `false`, then passing `order` is an error; if `sort` is `nothing`
+  then it is set to `true` when `order` is passed).
 - `sort` : if `sort=true` sort groups according to the values of the grouping
   columns `cols`; if `sort=false` groups are created in their order of
   appearance in `df`; if `sort=nothing` (the default) then the fastest available
@@ -109,7 +110,8 @@ and none of them is equal to `-0.0`.
 
 # See also
 
-[`combine`](@ref), [`select`](@ref), [`select!`](@ref), [`transform`](@ref), [`transform!`](@ref)
+[`combine`](@ref), [`select`](@ref), [`select!`](@ref), [`transform`](@ref),
+[`transform!`](@ref)
 
 # Examples
 ```jldoctest
@@ -216,7 +218,26 @@ julia> for g in gd
 function groupby(df::AbstractDataFrame, cols;
                  sort::Union{Bool,Nothing,NamedTuple}=nothing, skipmissing::Bool=false)
     _check_consistency(df)
-    idxcols = index(df)[normalize_grouping_cols(cols, sort === true || sort isa NamedTuple)]
+    if cols isa UserColOrdering ||
+       (cols isa AbstractVector && any(x -> x isa UserColOrdering, cols))
+        if isnothing(sort) || sort === true
+            # if sort === true replace it with NamedTuple to avoid sorting
+            # in row_group_slots as we will perform sorting later
+            sort = NamedTuple()
+        elseif sort === false
+            throw(ArgumentError("passing `order` is only allowed if `sort` " *
+                                "is `true`, `nothing`, or a `NamedTuple`"))
+        end
+        gcols = if cols isa UserColOrdering
+                    cols.col
+                else
+                    Any[x isa UserColOrdering ? x.col : x for x in cols]
+                end
+    else
+        gcols = cols
+    end
+
+    idxcols = index(df)[gcols]
     if isempty(idxcols)
         return GroupedDataFrame(df, Symbol[], ones(Int, nrow(df)),
                                 nothing, nothing, nothing, nrow(df) == 0 ? 0 : 1,
@@ -229,8 +250,8 @@ function groupby(df::AbstractDataFrame, cols;
         row_group_slots(ntuple(i -> sdf[!, i], ncol(sdf)), Val(false),
                         groups, skipmissing, sort isa NamedTuple ? nothing : sort)
 
-    gd = GroupedDataFrame(df, copy(_names(sdf)), groups, nothing, nothing, nothing, ngroups, nothing,
-                          Threads.ReentrantLock())
+    gd = GroupedDataFrame(df, copy(_names(sdf)), groups, nothing, nothing, nothing,
+                          ngroups, nothing, Threads.ReentrantLock())
 
     # sort groups if row_group_slots hasn't already done that
     if (sort === true && !sorted) || (sort isa NamedTuple)
@@ -250,25 +271,6 @@ function groupby(df::AbstractDataFrame, cols;
     return gd
 end
 
-normalize_grouping_cols(cols, sort::Bool) = cols
-
-function normalize_grouping_cols(cols::UserColOrdering, sort::Bool)
-    sort || throw(ArgumentError("passing `order` is only allowed if `sort` " *
-                                "is `true` or a `NamedTuple`"))
-    return cols.col
-end
-
-function normalize_grouping_cols(cols::AbstractVector, sort::Bool)
-    has_order = any(x -> x isa UserColOrdering, cols)
-    if has_order
-        sort || throw(ArgumentError("passing `order` is only allowed if `sort` " *
-                                    "is `true` or a `NamedTuple`"))
-        return Any[x isa UserColOrdering ? x.col : x for x in cols]
-    else
-        return cols
-    end
-end
-
 function genkeymap(gd, cols)
     # currently we use Dict{Any, Int} because then field :keymap in GroupedDataFrame
     # has a concrete type which makes the access to it faster as we do not have a dynamic
diff --git a/test/grouping.jl b/test/grouping.jl
index 3a1b91e1e1..c94666ca74 100644
--- a/test/grouping.jl
+++ b/test/grouping.jl
@@ -4379,50 +4379,131 @@ end
 end
 
 @testset "sorting API" begin
+    # simple tests
+    df = DataFrame(x=["b", "c", "b", "a", "c"])
+    @test getindex.(keys(groupby(df, :x)), 1) == ["b", "c", "a"]
+    @test getindex.(keys(groupby(df, :x, sort=true)), 1) == ["a", "b", "c"]
+    @test getindex.(keys(groupby(df, :x, sort=NamedTuple())), 1) == ["a", "b", "c"]
+    @test getindex.(keys(groupby(df, :x, sort=false)), 1) == ["b", "c", "a"]
+    @test getindex.(keys(groupby(df, order(:x))), 1) == ["a", "b", "c"]
+    @test getindex.(keys(groupby(df, order(:x), sort=true)), 1) == ["a", "b", "c"]
+    @test_throws ArgumentError groupby(df, order(:x), sort=false)
+    @test getindex.(keys(groupby(df, order(:x), sort=NamedTuple())), 1) == ["a", "b", "c"]
+    @test getindex.(keys(groupby(df, [order(:x)])), 1) == ["a", "b", "c"]
+    @test getindex.(keys(groupby(df, [order(:x)], sort=true)), 1) == ["a", "b", "c"]
+    @test_throws ArgumentError groupby(df, [order(:x)], sort=false)
+    @test getindex.(keys(groupby(df, [order(:x)], sort=NamedTuple())), 1) == ["a", "b", "c"]
+    @test getindex.(keys(groupby(df, order(:x, rev=true))), 1) == ["c", "b", "a"]
+    @test getindex.(keys(groupby(df, order(:x, rev=true), sort=true)), 1) == ["c", "b", "a"]
+    @test getindex.(keys(groupby(df, order(:x, rev=true), sort=NamedTuple())), 1) == ["c", "b", "a"]
+    @test getindex.(keys(groupby(df, [order(:x, rev=true)])), 1) == ["c", "b", "a"]
+    @test getindex.(keys(groupby(df, [order(:x, rev=true)], sort=true)), 1) == ["c", "b", "a"]
+    @test getindex.(keys(groupby(df, [order(:x, rev=true)], sort=NamedTuple())), 1) == ["c", "b", "a"]
+    @test getindex.(keys(groupby(df, :x, sort=(;rev=true))), 1) == ["c", "b", "a"]
+    @test getindex.(keys(groupby(df, [:x], sort=(;rev=true))), 1) == ["c", "b", "a"]
+   
+    # by default sorting is not applied as range of values is wide
+    df = DataFrame(x=[2, 100, 2, 1, 100])
+    @test getindex.(keys(groupby(df, :x)), 1) == [2, 100, 1]
+    @test getindex.(keys(groupby(df, :x, sort=true)), 1) == [1, 2, 100]
+    @test getindex.(keys(groupby(df, :x, sort=NamedTuple())), 1) == [1, 2, 100]
+    @test getindex.(keys(groupby(df, :x, sort=false)), 1) == [2, 100, 1]
+    @test getindex.(keys(groupby(df, order(:x))), 1) == [1, 2, 100]
+    @test getindex.(keys(groupby(df, [order(:x)])), 1) == [1, 2, 100]
+    @test getindex.(keys(groupby(df, order(:x, rev=true))), 1) == [100, 2, 1]
+    @test getindex.(keys(groupby(df, [order(:x, rev=true)])), 1) == [100, 2, 1]
+    @test getindex.(keys(groupby(df, :x, sort=(;rev=true))), 1) == [100, 2, 1]
+    @test getindex.(keys(groupby(df, [:x], sort=(;rev=true))), 1) == [100, 2, 1]
+
+    # by default sorting is applied as range of values is wide
+    df = DataFrame(x=[2, 3, 2, 1, 3])
+    @test getindex.(keys(groupby(df, :x)), 1) == [1, 2, 3]
+    @test getindex.(keys(groupby(df, :x, sort=true)), 1) == [1, 2, 3]
+    @test getindex.(keys(groupby(df, :x, sort=NamedTuple())), 1) == [1, 2, 3]
+    @test getindex.(keys(groupby(df, :x, sort=false)), 1) == [2, 3, 1]
+    @test getindex.(keys(groupby(df, order(:x))), 1) == [1, 2, 3]
+    @test getindex.(keys(groupby(df, [order(:x)])), 1) == [1, 2, 3]
+    @test getindex.(keys(groupby(df, order(:x, rev=true))), 1) == [3, 2, 1]
+    @test getindex.(keys(groupby(df, [order(:x, rev=true)])), 1) == [3, 2, 1]
+    @test getindex.(keys(groupby(df, :x, sort=(;rev=true))), 1) == [3, 2, 1]
+    @test getindex.(keys(groupby(df, [:x], sort=(;rev=true))), 1) == [3, 2, 1]
+
+    # randomized tests
     Random.seed!(1234)
-    df = DataFrame(a=rand(-10:10, 100), b=rand(-10:10, 100), c=1:100)
-    for col in (:a, "a", 1, :b, "b", 2, :c, "c", 3) 
-        gdf = groupby(df, col, sort=true)
-        @test issorted(DataFrame(gdf)[:, col])
+    df1 = DataFrame(a=rand(-10:10, 100), b=rand(-10:10, 100), c=1:100)
+    df2 = string.(df1, pad=3)
+
+    for df in (df1, df2)
+        for col in (:a, "a", 1, :b, "b", 2, :c, "c", 3) 
+            gdf = groupby(df, order(col))
+            @test issorted(DataFrame(gdf)[:, col])
+            @test all(x -> issorted(x.c), gdf)
+            gdf = groupby(df, col, sort=true)
+            @test issorted(DataFrame(gdf)[:, col])
+            @test all(x -> issorted(x.c), gdf)
+            gdf = groupby(df, order(col), sort=true)
+            @test issorted(DataFrame(gdf)[:, col])
+            @test all(x -> issorted(x.c), gdf)
+            gdf = groupby(df, col, sort=NamedTuple())
+            @test issorted(DataFrame(gdf)[:, col])
+            @test all(x -> issorted(x.c), gdf)
+            gdf = groupby(df, order(col), sort=NamedTuple())
+            @test issorted(DataFrame(gdf)[:, col])
+            @test all(x -> issorted(x.c), gdf)
+            gdf = groupby(df, col, sort=(rev=true,))
+            @test issorted(DataFrame(gdf)[:, col], rev=true)
+            @test all(x -> issorted(x.c), gdf)
+            if eltype(df[!, col]) === Int
+                gdf = groupby(df, order(col, by=abs), sort=(rev=true,))
+                @test issorted(DataFrame(gdf)[:, col], rev=true, by=abs)
+            else
+                gdf = groupby(df, order(col, by=abs∘(x -> parse(Int, x))), sort=(rev=true,))
+                @test issorted(DataFrame(gdf)[:, col], rev=true, by=abs∘(x -> parse(Int, x)))
+            end
+            @test all(x -> issorted(x.c), gdf)
+            gdf = groupby(df, col, sort=false)
+            @test getindex.(keys(gdf), 1) == unique(df[!, col])
+            @test all(x -> issorted(x.c), gdf)
+        end
+
+        gdf = groupby(df, [:a, :b], sort=true)
+        @test issorted(DataFrame(gdf), [:a, :b])
         @test all(x -> issorted(x.c), gdf)
-        gdf = groupby(df, col, sort=NamedTuple())
-        @test issorted(DataFrame(gdf)[:, col])
+        gdf = groupby(df, [:a, order(:b)])
+        @test issorted(DataFrame(gdf), [:a, :b])
         @test all(x -> issorted(x.c), gdf)
-        gdf = groupby(df, col, sort=(rev=true,))
-        @test issorted(DataFrame(gdf)[:, col], rev=true)
+        gdf = groupby(df, [:a, order(:b)], sort=true)
+        @test issorted(DataFrame(gdf), [:a, :b])
         @test all(x -> issorted(x.c), gdf)
-        gdf = groupby(df, order(col, by=abs), sort=(rev=true,))
-        @test issorted(DataFrame(gdf)[:, col], rev=true, by=abs)
+        gdf = groupby(df, [:a, :b], sort=NamedTuple())
+        @test issorted(DataFrame(gdf), [:a, :b])
         @test all(x -> issorted(x.c), gdf)
-        gdf = groupby(df, col, sort=false)
-        @test getindex.(keys(gdf), 1) == unique(df[!, col])
+        gdf = groupby(df, [:a, order(:b)], sort=NamedTuple())
+        @test issorted(DataFrame(gdf), [:a, :b])
+        @test all(x -> issorted(x.c), gdf)
+        gdf = groupby(df, [:a, :b], sort=(rev=true,))
+        @test issorted(DataFrame(gdf), [:a, :b], rev=true)
+        @test all(x -> issorted(x.c), gdf)
+        if eltype(df[!, col]) === Int
+            gdf = groupby(df, [order(:a, by=abs), :b], sort=(rev=true,))
+            @test issorted(DataFrame(gdf), [order(:a, by=abs), :b], rev=true)
+            @test all(x -> issorted(x.c), gdf)
+        else
+            gdf = groupby(df, [order(:a, by=abs∘(x -> parse(Int, x))), :b], sort=(rev=true,))
+            @test issorted(DataFrame(gdf), [order(:a, by=abs∘(x -> parse(Int, x))), :b], rev=true)
+            @test all(x -> issorted(x.c), gdf)
+        end
+        gdf = groupby(df, [:a, order(:b, rev=false)], sort=(rev=true,))
+        @test issorted(DataFrame(gdf), [:a, order(:b, rev=false)], rev=true)
+        @test all(x -> issorted(x.c), gdf)
+        gdf = groupby(df, [:a, :b], sort=false)
+        @test Tuple.(keys(gdf)) == unique(Tuple.(eachrow(df[!, [:a, :b]])))
         @test all(x -> issorted(x.c), gdf)
-    end
 
-    gdf = groupby(df, [:a, :b], sort=true)
-    @test issorted(DataFrame(gdf), [:a, :b])
-    @test all(x -> issorted(x.c), gdf)
-    gdf = groupby(df, [:a, :b], sort=NamedTuple())
-    @test issorted(DataFrame(gdf), [:a, :b])
-    @test all(x -> issorted(x.c), gdf)
-    gdf = groupby(df, [:a, :b], sort=(rev=true,))
-    @test issorted(DataFrame(gdf), [:a, :b], rev=true)
-    @test all(x -> issorted(x.c), gdf)
-    gdf = groupby(df, [order(:a, by=abs), :b], sort=(rev=true,))
-    @test issorted(DataFrame(gdf), [order(:a, by=abs), :b], rev=true)
-    @test all(x -> issorted(x.c), gdf)
-    gdf = groupby(df, [:a, order(:b, rev=false)], sort=(rev=true,))
-    @test issorted(DataFrame(gdf), [:a, order(:b, rev=false)], rev=true)
-    @test all(x -> issorted(x.c), gdf)
-    gdf = groupby(df, [:a, :b], sort=false)
-    @test Tuple.(keys(gdf)) == unique(Tuple.(eachrow(df[!, [:a, :b]])))
-    @test all(x -> issorted(x.c), gdf)
-
-    @test_throws ArgumentError groupby(df, order(:a))
-    @test_throws ArgumentError groupby(df, order(:a), sort=false)
-    @test_throws ArgumentError groupby(df, [:b, order(:a)])
-    @test_throws ArgumentError groupby(df, [:b, order(:a)], sort=false)
-    @test_throws MethodError groupby(df, :a, sort=(x=1,))
+        @test_throws ArgumentError groupby(df, order(:a), sort=false)
+        @test_throws ArgumentError groupby(df, [:b, order(:a)], sort=false)
+        @test_throws MethodError groupby(df, :a, sort=(x=1,))
+    end
 end
 
 end # module

From 7bfaea0fda796d0e4ab118f3d2dac37cee6435d5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Sun, 25 Dec 2022 13:51:55 +0100
Subject: [PATCH 11/14] update manual

---
 docs/src/man/split_apply_combine.md | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/docs/src/man/split_apply_combine.md b/docs/src/man/split_apply_combine.md
index c438b5792f..03de3c8763 100644
--- a/docs/src/man/split_apply_combine.md
+++ b/docs/src/man/split_apply_combine.md
@@ -1322,18 +1322,13 @@ julia> keys(groupby(df, :volume, sort=true))
  GroupKey: (volume = 100,)
 ```
 
-You can also use the [`order`](@ref) wrapper when passing a column name to group by
-or pass a named tuple containing one or more of `alg`, `lt`, `by`, `rev`, and
-`order` fields that will be treated just like in [`sortperm`](@ref):
+You can also use the [`order`](@ref) wrapper when passing a column name to group
+by or pass a named tuple as `sort` keyword argument containing one or more of
+`alg`, `lt`, `by`, `rev`, and `order` fields that will be treated just like in
+[`sortperm`](@ref):
 
 ```
-julia> keys(groupby(df, :customer_id, sort=(rev=true,)))
-3-element DataFrames.GroupKeys{GroupedDataFrame{DataFrame}}:
- GroupKey: (customer_id = "c",)
- GroupKey: (customer_id = "b",)
- GroupKey: (customer_id = "a",)
-
-julia> keys(groupby(df, [:customer_id, order(:volume, rev=true)], sort=true))
+julia> keys(groupby(df, [:customer_id, order(:volume, rev=true)]))
 6-element DataFrames.GroupKeys{GroupedDataFrame{DataFrame}}:
  GroupKey: (customer_id = "a", volume = 2)
  GroupKey: (customer_id = "b", volume = 4)
@@ -1342,4 +1337,10 @@ julia> keys(groupby(df, [:customer_id, order(:volume, rev=true)], sort=true))
  GroupKey: (customer_id = "c", volume = 9)
  GroupKey: (customer_id = "c", volume = 5)
 
+julia> keys(groupby(df, :customer_id, sort=(rev=true,)))
+3-element DataFrames.GroupKeys{GroupedDataFrame{DataFrame}}:
+ GroupKey: (customer_id = "c",)
+ GroupKey: (customer_id = "b",)
+ GroupKey: (customer_id = "a",)
 ```
+

From 4948ae84ffcd95c246ff8194ea8b028c4df19c89 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Sun, 25 Dec 2022 16:46:19 +0100
Subject: [PATCH 12/14] fix test

---
 test/grouping.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/grouping.jl b/test/grouping.jl
index c94666ca74..cb6371f66c 100644
--- a/test/grouping.jl
+++ b/test/grouping.jl
@@ -4484,7 +4484,7 @@ end
         gdf = groupby(df, [:a, :b], sort=(rev=true,))
         @test issorted(DataFrame(gdf), [:a, :b], rev=true)
         @test all(x -> issorted(x.c), gdf)
-        if eltype(df[!, col]) === Int
+        if eltype(df[!, :a]) === Int
             gdf = groupby(df, [order(:a, by=abs), :b], sort=(rev=true,))
             @test issorted(DataFrame(gdf), [order(:a, by=abs), :b], rev=true)
             @test all(x -> issorted(x.c), gdf)

From cc9b8e8530ba767c054e3aac25c439b664f668f9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Tue, 27 Dec 2022 12:32:07 +0100
Subject: [PATCH 13/14] Update test/grouping.jl

Co-authored-by: Milan Bouchet-Valat <nalimilan@club.fr>
---
 test/grouping.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/grouping.jl b/test/grouping.jl
index cb6371f66c..ddbac2687c 100644
--- a/test/grouping.jl
+++ b/test/grouping.jl
@@ -4415,7 +4415,7 @@ end
     @test getindex.(keys(groupby(df, :x, sort=(;rev=true))), 1) == [100, 2, 1]
     @test getindex.(keys(groupby(df, [:x], sort=(;rev=true))), 1) == [100, 2, 1]
 
-    # by default sorting is applied as range of values is wide
+    # by default sorting is applied as range of values is narrow
     df = DataFrame(x=[2, 3, 2, 1, 3])
     @test getindex.(keys(groupby(df, :x)), 1) == [1, 2, 3]
     @test getindex.(keys(groupby(df, :x, sort=true)), 1) == [1, 2, 3]

From 7e72fc68c03473e6e139611c4402199138b4240a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Tue, 27 Dec 2022 14:19:35 +0100
Subject: [PATCH 14/14] improve docstring

---
 src/groupeddataframe/groupeddataframe.jl | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/groupeddataframe/groupeddataframe.jl b/src/groupeddataframe/groupeddataframe.jl
index 38fc61a295..f6d4bf9c69 100644
--- a/src/groupeddataframe/groupeddataframe.jl
+++ b/src/groupeddataframe/groupeddataframe.jl
@@ -60,10 +60,10 @@ into row groups.
 - `cols` : data frame columns to group by. Can be any column selector
   ($COLUMNINDEX_STR; $MULTICOLUMNINDEX_STR). In particular if the selector
   picks no columns then a single-group `GroupedDataFrame` is created. As a
-  special case, if a list of columns to group by is passed as a vector it can
-  contain columns wrapped in [`order`](@ref) that will be used to determine
-  the order of groups if `sort` is `true` or a `NamedTuple` (if `sort` is
-  `false`, then passing `order` is an error; if `sort` is `nothing`
+  special case, if `cols` is a single column or a vector of columns then
+  it can contain columns wrapped in [`order`](@ref) that will be used to
+  determine the order of groups if `sort` is `true` or a `NamedTuple` (if `sort`
+  is `false`, then passing `order` is an error; if `sort` is `nothing`
   then it is set to `true` when `order` is passed).
 - `sort` : if `sort=true` sort groups according to the values of the grouping
   columns `cols`; if `sort=false` groups are created in their order of
@@ -216,7 +216,8 @@ julia> for g in gd
 ```
 """
 function groupby(df::AbstractDataFrame, cols;
-                 sort::Union{Bool,Nothing,NamedTuple}=nothing, skipmissing::Bool=false)
+                 sort::Union{Bool, Nothing, NamedTuple}=nothing,
+                 skipmissing::Bool=false)
     _check_consistency(df)
     if cols isa UserColOrdering ||
        (cols isa AbstractVector && any(x -> x isa UserColOrdering, cols))