From 17da97173b62d13668be39d23ab92c8cdf9181af Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Mon, 26 Sep 2022 23:19:07 +0200
Subject: [PATCH 1/5] rename valuestransform to valuesfunction in unstack

---
 NEWS.md                                |  2 +-
 docs/src/man/reshaping_and_pivoting.md |  4 +--
 src/abstractdataframe/reshape.jl       | 40 +++++++++++------------
 test/metadata.jl                       |  4 +--
 test/multithreading.jl                 |  6 ++--
 test/reshape.jl                        | 44 +++++++++++++-------------
 6 files changed, 50 insertions(+), 50 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 45317dc54d..4402e4597b 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -18,7 +18,7 @@
   for a more flexible handling of values stored in a column that will
   become a new header
   ([#3004](https://github.com/JuliaData/DataFrames.jl/issues/3004))
-* `unstack` now allows passing a function in `valuestransform` keyword argument;
+* `unstack` now allows passing a function in `valuesfunction` keyword argument;
   this allows for a convenient creation of two dimensional pivot tables
   ([#2998](https://github.com/JuliaData/DataFrames.jl/issues/2998))
 * `filter` for `GroupedDataFrame` now accepts `ungroup` keyword argument
diff --git a/docs/src/man/reshaping_and_pivoting.md b/docs/src/man/reshaping_and_pivoting.md
index 21d6138abc..e2974e400f 100755
--- a/docs/src/man/reshaping_and_pivoting.md
+++ b/docs/src/man/reshaping_and_pivoting.md
@@ -297,7 +297,7 @@ Id columns -- `RepeatedVector`
 This repeats the original columns N times where N is the number of columns stacked.
 
 To do aggregation, use the split-apply-combine functions in combination with
-`unstack` or use the `valuestransform` keyword argument in `unstack`. Here is an example:
+`unstack` or use the `valuesfunction` keyword argument in `unstack`. Here is an example:
 
 ```jldoctest reshape
 julia> using Statistics
@@ -357,7 +357,7 @@ julia> unstack(agg, :variable, :Species, :vmean)
    4 │ PetalWidth         0.244            1.326           2.026
    5 │ id                25.5             75.5           125.5
 
-julia> unstack(d, :variable, :Species, :value, valuestransform=mean)
+julia> unstack(d, :variable, :Species, :value, valuesfunction=mean)
 5×4 DataFrame
  Row │ variable     Iris-setosa  Iris-versicolor  Iris-virginica
      │ String       Float64?     Float64?         Float64?
diff --git a/src/abstractdataframe/reshape.jl b/src/abstractdataframe/reshape.jl
index b974886cad..9226366549 100644
--- a/src/abstractdataframe/reshape.jl
+++ b/src/abstractdataframe/reshape.jl
@@ -215,15 +215,15 @@ end
 """
     unstack(df::AbstractDataFrame, rowkeys, colkey, value;
             renamecols::Function=identity, allowmissing::Bool=false,
-            allowduplicates::Bool=false, valuestransform=nothing,
+            allowduplicates::Bool=false, valuesfunction=nothing,
             fill=missing, threads::Bool=true)
     unstack(df::AbstractDataFrame, colkey, value;
             renamecols::Function=identity, allowmissing::Bool=false,
-            allowduplicates::Bool=false, valuestransform=nothing,
+            allowduplicates::Bool=false, valuesfunction=nothing,
             fill=missing, threads::Bool=true)
     unstack(df::AbstractDataFrame;
             renamecols::Function=identity, allowmissing::Bool=false,
-            allowduplicates::Bool=false, valuestransform=nothing,
+            allowduplicates::Bool=false, valuesfunction=nothing,
             fill=missing, threads::Bool=true)
 
 Unstack data frame `df`, i.e. convert it from long to wide format.
@@ -252,18 +252,18 @@ Row and column keys will be ordered in the order of their first appearance.
 - `allowduplicates`: if `false` (the default) then an error an error will be
   thrown if combination of `rowkeys` and `colkey` contains duplicate entries; if
   `true` then the last encountered `value` will be retained;
-  this keyword argument is ignored if `valuestransform` keyword argument is passed.
-- `valuestransform`: if passed then `allowduplicates` is ignored and instead
+  this keyword argument is ignored if `valuesfunction` keyword argument is passed.
+- `valuesfunction`: if passed then `allowduplicates` is ignored and instead
    the passed function will be called on a vector view containing all elements
    for each combination of `rowkeys` and `colkey` present in the data.
 - `fill`: missing row/column combinations are filled with this value. The
   default is `missing`. If the `value` column is a `CategoricalVector` and
   `fill` is not `missing` then in order to keep unstacked value columns also
   `CategoricalVector` the `fill` must be passed as `CategoricalValue`
-- `threads`: whether `valuestransform` may be run in separate tasks which
+- `threads`: whether `valuesfunction` may be run in separate tasks which
   can execute in parallel (possibly being applied to multiple groups at the same time).
   Whether or not tasks are actually spawned and their number are determined automatically.
-  Set to `false` if `valuestransform` requires serial execution or is not thread-safe.
+  Set to `false` if `valuesfunction` requires serial execution or is not thread-safe.
 
 Metadata: table-level `:note`-style metadata and column-level `:note`-style metadata
 for row keys columns are preserved.
@@ -401,14 +401,14 @@ julia> df = DataFrame(cols=["a", "a", "b"], values=[1, 2, 4])
    2 │ a            2
    3 │ b            4
 
-julia> unstack(df, :cols, :values, valuestransform=copy)
+julia> unstack(df, :cols, :values, valuesfunction=copy)
 1×2 DataFrame
  Row │ a        b
      │ Array…?  Array…?
 ─────┼──────────────────
    1 │ [1, 2]   [4]
 
-julia> unstack(df, :cols, :values, valuestransform=sum)
+julia> unstack(df, :cols, :values, valuesfunction=sum)
 1×2 DataFrame
  Row │ a       b
      │ Int64?  Int64?
@@ -419,7 +419,7 @@ julia> unstack(df, :cols, :values, valuestransform=sum)
 function unstack(df::AbstractDataFrame, rowkeys, colkey::ColumnIndex,
                  values::ColumnIndex; renamecols::Function=identity,
                  allowmissing::Bool=false, allowduplicates::Bool=false,
-                 valuestransform=nothing, fill=missing,
+                 valuesfunction=nothing, fill=missing,
                  threads::Bool=true)
     # first make sure that rowkeys are unique and
     # normalize all selectors as a strings
@@ -428,7 +428,7 @@ function unstack(df::AbstractDataFrame, rowkeys, colkey::ColumnIndex,
     colkey = only(names(df, colkey))
     values = only(names(df, values))
 
-    if !isnothing(valuestransform)
+    if !isnothing(valuesfunction)
         # potentially colkey can be also part of rowkeys so we need to do unique
         groupcols = unique!([rowkeys; colkey])
         @assert groupcols isa Vector{String}
@@ -441,16 +441,16 @@ function unstack(df::AbstractDataFrame, rowkeys, colkey::ColumnIndex,
         end
 
         gdf = groupby(df, groupcols)
-        if check_aggregate(valuestransform, df[!, values]) isa AbstractAggregate
-            # if valuestransform function is AbstractAggregate
+        if check_aggregate(valuesfunction, df[!, values]) isa AbstractAggregate
+            # if valuesfunction function is AbstractAggregate
             # then we are sure it will return a scalar number so we can
             # leave it as is and be sure we use fast path in combine
-            agg_fun = valuestransform
+            agg_fun = valuesfunction
         else
-            # in general valuestransform function could return e.g. a vector,
+            # in general valuesfunction function could return e.g. a vector,
             # which would get expanded to multiple rows so we protect it with
             # Ref that will get unwrapped by combine
-            agg_fun = Ref∘valuestransform
+            agg_fun = Ref∘valuesfunction
         end
         df_op = combine(gdf, values => agg_fun => values_out,
                         threads=threads)
@@ -478,22 +478,22 @@ end
 function unstack(df::AbstractDataFrame, colkey::ColumnIndex, values::ColumnIndex;
                  renamecols::Function=identity,
                  allowmissing::Bool=false, allowduplicates::Bool=false,
-                 valuestransform=nothing, fill=missing,
+                 valuesfunction=nothing, fill=missing,
                  threads::Bool=true)
     colkey_int = index(df)[colkey]
     value_int = index(df)[values]
     return unstack(df, Not(colkey_int, value_int), colkey_int, value_int,
             renamecols=renamecols, allowmissing=allowmissing,
-            allowduplicates=allowduplicates, valuestransform=valuestransform,
+            allowduplicates=allowduplicates, valuesfunction=valuesfunction,
             fill=fill, threads=threads)
 end
 
 unstack(df::AbstractDataFrame; renamecols::Function=identity,
         allowmissing::Bool=false, allowduplicates::Bool=false,
-        valuestransform=nothing, fill=missing,
+        valuesfunction=nothing, fill=missing,
         threads::Bool=true) =
     unstack(df, :variable, :value, renamecols=renamecols, allowmissing=allowmissing,
-            allowduplicates=allowduplicates, valuestransform=valuestransform,
+            allowduplicates=allowduplicates, valuesfunction=valuesfunction,
             fill=fill, threads=threads)
 
 # we take into account the fact that idx, starts and ends are computed lazily
diff --git a/test/metadata.jl b/test/metadata.jl
index 40d6c06c7e..df02346169 100644
--- a/test/metadata.jl
+++ b/test/metadata.jl
@@ -1327,7 +1327,7 @@ end
     @test check_allnotemetadata(res)
     @test getfield(res, :metadata) === nothing
     @test getfield(res, :colmetadata) === nothing
-    res = unstack(long, :a, :variable, :value, valuestransform=copy)
+    res = unstack(long, :a, :variable, :value, valuesfunction=copy)
     @test check_allnotemetadata(res)
     @test getfield(res, :metadata) === nothing
     @test getfield(res, :colmetadata) === nothing
@@ -1361,7 +1361,7 @@ end
     @test isempty(colmetadatakeys(res, :c))
     @test isempty(colmetadatakeys(res, :d))
 
-    res = unstack(long, :a, :variable, :value, valuestransform=copy)
+    res = unstack(long, :a, :variable, :value, valuesfunction=copy)
     @test check_allnotemetadata(res)
     @test collect(metadatakeys(res)) == ["name"]
     @test metadata(res, "name") == "empty"
diff --git a/test/multithreading.jl b/test/multithreading.jl
index c81ee0ac1f..65ab239820 100644
--- a/test/multithreading.jl
+++ b/test/multithreading.jl
@@ -237,15 +237,15 @@ end
     m = Ref(0)
     n = Ref(0)
     unstack(df,
-            allowduplicates=true, valuestransform=x -> (l[] += 1),
+            allowduplicates=true, valuesfunction=x -> (l[] += 1),
             threads=false) ==
             DataFrame(id=1:3, a=[1, 3, 5], b=[2, 4, 6]) ==
     unstack(df, :variable, :value,
-            allowduplicates=true, valuestransform=x -> (m[] += 1),
+            allowduplicates=true, valuesfunction=x -> (m[] += 1),
             threads=false) ==
             DataFrame(id=1:3, a=[1, 3, 5], b=[2, 4, 6]) ==
     unstack(df, :id, :variable, :value,
-            allowduplicates=true, valuestransform=x -> (n[] += 1),
+            allowduplicates=true, valuesfunction=x -> (n[] += 1),
             threads=false) ==
             DataFrame(id=1:3, a=[1, 3, 5], b=[2, 4, 6])
 
diff --git a/test/reshape.jl b/test/reshape.jl
index 6c80544ac3..f2676b028a 100644
--- a/test/reshape.jl
+++ b/test/reshape.jl
@@ -769,17 +769,17 @@ end
     df = DataFrame(x=[:one, :two, :one], y=[1, 2, 3])
     @test_throws ArgumentError unstack(df, :x, :y)
     @test unstack(df, :x, :y, allowduplicates=true) == DataFrame(one=3, two=2)
-    @test unstack(df, :x, :y, valuestransform=identity) ==
+    @test unstack(df, :x, :y, valuesfunction=identity) ==
           DataFrame(one=[[1, 3]], two=[[2]])
-    @test unstack(df, :x, :y, valuestransform=last) ==
+    @test unstack(df, :x, :y, valuesfunction=last) ==
           DataFrame(one=3, two=2)
-    @test unstack(df, :x, :y, valuestransform=first) ==
+    @test unstack(df, :x, :y, valuesfunction=first) ==
           DataFrame(one=1, two=2)
-    @test unstack(df, :x, :y, valuestransform=length) ==
+    @test unstack(df, :x, :y, valuesfunction=length) ==
           DataFrame(one=2, two=1)
 end
 
-@testset "valuestransform" begin
+@testset "valuesfunction" begin
     df = DataFrame(rowid=[1, 1, 1, 1, 2, 2], colid=[1, 1, 2, 2, 3, 3], values=1:6)
     @test_throws ArgumentError unstack(df, :rowid, :colid, :values)
     @test unstack(df, :rowid, :colid, :values, allowduplicates=true) ≅
@@ -788,42 +788,42 @@ end
     @test unstack(df, :rowid, :colid, :values, allowduplicates=true, fill=0) ==
           DataFrame("rowid" => 1:2, "1" => [2, 0],
                     "2" => [4, 0], "3" => [0, 6])
-    @test unstack(df, :rowid, :colid, :values, valuestransform=identity) ≅
+    @test unstack(df, :rowid, :colid, :values, valuesfunction=identity) ≅
           DataFrame("rowid" => 1:2, "1" => [1:2, missing],
                     "2" => [3:4, missing], "3" => [missing, 5:6])
     @test unstack(df, :rowid, :colid, :values,
-                  valuestransform=identity, fill=Int[]) ==
+                  valuesfunction=identity, fill=Int[]) ==
           DataFrame("rowid" => 1:2, "1" => [1:2, []],
                     "2" => [3:4, []], "3" => [[], 5:6])
-    @test unstack(df, :rowid, :colid, :values, valuestransform=sum) ≅
+    @test unstack(df, :rowid, :colid, :values, valuesfunction=sum) ≅
           DataFrame("rowid" => 1:2, "1" => [3, missing],
                     "2" => [7, missing], "3" => [missing, 11])
-    @test unstack(df, :rowid, :colid, :values, valuestransform=sum, fill=0) ==
+    @test unstack(df, :rowid, :colid, :values, valuesfunction=sum, fill=0) ==
           DataFrame("rowid" => 1:2, "1" => [3, 0],
                     "2" => [7, 0], "3" => [0, 11])
-    @test unstack(df, :rowid, :colid, :values, valuestransform=sum, fill="X") ==
+    @test unstack(df, :rowid, :colid, :values, valuesfunction=sum, fill="X") ==
           DataFrame("rowid" => 1:2, "1" => [3, "X"],
                     "2" => [7, "X"], "3" => ["X", 11])
-    @test unstack(df, :rowid, :colid, :values, valuestransform=length) ≅
+    @test unstack(df, :rowid, :colid, :values, valuesfunction=length) ≅
           DataFrame("rowid" => 1:2, "1" => [2, missing],
                     "2" => [2, missing], "3" => [missing, 2])
-    @test unstack(df, :rowid, :colid, :values, valuestransform=length, fill=0) ==
+    @test unstack(df, :rowid, :colid, :values, valuesfunction=length, fill=0) ==
           DataFrame("rowid" => 1:2, "1" => [2, 0],
                     "2" => [2, 0], "3" => [0, 2])
     @test unstack(df, :rowid, :colid, :values,
-                  valuestransform=x -> isempty(x) ? missing : length(x)) ≅
+                  valuesfunction=x -> isempty(x) ? missing : length(x)) ≅
           DataFrame("rowid" => 1:2, "1" => [2, missing],
                     "2" => [2, missing], "3" => [missing, 2])
     @test unstack(df, :rowid, :colid, :values,
-                  valuestransform=x -> isempty(x) ? missing : x) ≅
+                  valuesfunction=x -> isempty(x) ? missing : x) ≅
           DataFrame("rowid" => 1:2, "1" => [1:2, missing],
                     "2" => [3:4, missing], "3" => [missing, 5:6])
 
     df = DataFrame(rowid=[2, 2, 2, 2, 1, 1], colid=[2, 2, 1, 1, 3, 3], values=1:6)
-    @test unstack(df, :rowid, :colid, :values, valuestransform=identity) ≅
+    @test unstack(df, :rowid, :colid, :values, valuesfunction=identity) ≅
           DataFrame("rowid" => [2,1], "2" => [1:2, missing],
                     "1" => [3:4, missing], "3" => [missing, 5:6])
-    @test unstack(df, :rowid, :colid, :values, valuestransform=identity, fill="X") ==
+    @test unstack(df, :rowid, :colid, :values, valuesfunction=identity, fill="X") ==
           DataFrame("rowid" => [2,1], "2" => [1:2, "X"],
                     "1" => [3:4, "X"], "3" => ["X", 5:6])
 
@@ -831,22 +831,22 @@ end
     # check correctness of row and column ordering
     for _ in 1:10
         df = DataFrame(rowid=rand(1:10, 50), colid=rand(1:10, 50), values=1:50)
-        res = unstack(df, :rowid, :colid, :values, valuestransform=last)
+        res = unstack(df, :rowid, :colid, :values, valuesfunction=last)
         @test res ≅ unstack(df, :rowid, :colid, :values, allowduplicates=true)
         @test res.rowid == unique(df.rowid)
         @test names(res, Not(1)) == string.(unique(df.colid))
-        res = unstack(df, :rowid, :colid, :values, valuestransform=last, fill=0)
+        res = unstack(df, :rowid, :colid, :values, valuesfunction=last, fill=0)
         @test res ≅ unstack(df, :rowid, :colid, :values, allowduplicates=true, fill=0)
         @test res.rowid == unique(df.rowid)
         @test names(res, Not(1)) == string.(unique(df.colid))
 
         df.rowid=categorical(df.rowid, levels=shuffle(unique(df.rowid)))
         df.colid=categorical(df.colid, levels=shuffle(unique(df.colid)))
-        res = unstack(df, :rowid, :colid, :values, valuestransform=last)
+        res = unstack(df, :rowid, :colid, :values, valuesfunction=last)
         @test res ≅ unstack(df, :rowid, :colid, :values, allowduplicates=true)
         @test unwrap.(res.rowid) == unique(df.rowid)
         @test names(res, Not(1)) == string.(unique(df.colid))
-        res = unstack(df, :rowid, :colid, :values, valuestransform=last, fill=0)
+        res = unstack(df, :rowid, :colid, :values, valuesfunction=last, fill=0)
         @test res ≅
             unstack(df, :rowid, :colid, :values, allowduplicates=true, fill=0)
         @test unwrap.(res.rowid) == unique(df.rowid)
@@ -893,7 +893,7 @@ end
         for c in (:a, :b, :c, "a", "b", "c", 1, 2, 3)
             for v in (:a, :b, :c, "a", "b", "c", 1, 2, 3)
                 @test unstack(df, r, c, v) ≅
-                      broadcast(x -> x isa Vector ? only(x) : x, unstack(df, r, c, v, valuestransform=copy))
+                      broadcast(x -> x isa Vector ? only(x) : x, unstack(df, r, c, v, valuesfunction=copy))
             end
         end
     end
@@ -906,7 +906,7 @@ end
                     d=["a", missing, missing],
                     e=[missing, "b", missing],
                     f=[missing, missing, "c"])
-    @test unstack(df, 3, 2, 1, valuestransform=only) ≅
+    @test unstack(df, 3, 2, 1, valuesfunction=only) ≅
           DataFrame(values_out_3490283_11=["g", "h", "i"],
                     d=["a", missing, missing],
                     e=[missing, "b", missing],

From 8db0c0eb0314f5b9b6534a10b857d13fd5077ef6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Sat, 1 Oct 2022 12:15:24 +0200
Subject: [PATCH 2/5] change kwarg to combine and deprecate allowduplicates

---
 NEWS.md                                |  11 ++-
 docs/src/man/reshaping_and_pivoting.md |   4 +-
 src/abstractdataframe/reshape.jl       | 118 +++++++++++++------------
 test/deprecated.jl                     |  15 ++++
 test/metadata.jl                       |   4 +-
 test/multithreading.jl                 |   9 +-
 test/reshape.jl                        |  70 +++++++--------
 7 files changed, 128 insertions(+), 103 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 4402e4597b..16e7a44079 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -18,9 +18,10 @@
   for a more flexible handling of values stored in a column that will
   become a new header
   ([#3004](https://github.com/JuliaData/DataFrames.jl/issues/3004))
-* `unstack` now allows passing a function in `valuesfunction` keyword argument;
+* `unstack` now allows passing a function in `combine` keyword argument;
   this allows for a convenient creation of two dimensional pivot tables
-  ([#2998](https://github.com/JuliaData/DataFrames.jl/issues/2998))
+  ([#2998](https://github.com/JuliaData/DataFrames.jl/issues/2998),
+   [#3185](https://github.com/JuliaData/DataFrames.jl/pull/3185))
 * `filter` for `GroupedDataFrame` now accepts `ungroup` keyword argument
   ([#3021](https://github.com/JuliaData/DataFrames.jl/issues/3021))
 * Add special syntax for `eachindex`, `groupindices`, and `proprow`
@@ -65,6 +66,12 @@
   or older it is an in place operation.
   ([#3022](https://github.com/JuliaData/DataFrames.jl/pull/3022))
 
+# Deprecations
+
+* `allowduplicates` keyword argument in `unstack` is deprecated, use
+  `combine` should be used instead
+  ([#3185](https://github.com/JuliaData/DataFrames.jl/pull/3185))
+
 ## Internal changes
 
 * `DataFrame` is now a `mutable struct` and has three new fields
diff --git a/docs/src/man/reshaping_and_pivoting.md b/docs/src/man/reshaping_and_pivoting.md
index e2974e400f..e3cde762b6 100755
--- a/docs/src/man/reshaping_and_pivoting.md
+++ b/docs/src/man/reshaping_and_pivoting.md
@@ -297,7 +297,7 @@ Id columns -- `RepeatedVector`
 This repeats the original columns N times where N is the number of columns stacked.
 
 To do aggregation, use the split-apply-combine functions in combination with
-`unstack` or use the `valuesfunction` keyword argument in `unstack`. Here is an example:
+`unstack` or use the `combine` keyword argument in `unstack`. Here is an example:
 
 ```jldoctest reshape
 julia> using Statistics
@@ -357,7 +357,7 @@ julia> unstack(agg, :variable, :Species, :vmean)
    4 │ PetalWidth         0.244            1.326           2.026
    5 │ id                25.5             75.5           125.5
 
-julia> unstack(d, :variable, :Species, :value, valuesfunction=mean)
+julia> unstack(d, :variable, :Species, :value, combine=mean)
 5×4 DataFrame
  Row │ variable     Iris-setosa  Iris-versicolor  Iris-virginica
      │ String       Float64?     Float64?         Float64?
diff --git a/src/abstractdataframe/reshape.jl b/src/abstractdataframe/reshape.jl
index 9226366549..00ce50187d 100644
--- a/src/abstractdataframe/reshape.jl
+++ b/src/abstractdataframe/reshape.jl
@@ -215,20 +215,17 @@ end
 """
     unstack(df::AbstractDataFrame, rowkeys, colkey, value;
             renamecols::Function=identity, allowmissing::Bool=false,
-            allowduplicates::Bool=false, valuesfunction=nothing,
-            fill=missing, threads::Bool=true)
+            combine=nothing, fill=missing, threads::Bool=true)
     unstack(df::AbstractDataFrame, colkey, value;
             renamecols::Function=identity, allowmissing::Bool=false,
-            allowduplicates::Bool=false, valuesfunction=nothing,
-            fill=missing, threads::Bool=true)
+            combine=nothing, fill=missing, threads::Bool=true)
     unstack(df::AbstractDataFrame;
             renamecols::Function=identity, allowmissing::Bool=false,
-            allowduplicates::Bool=false, valuesfunction=nothing,
-            fill=missing, threads::Bool=true)
+            combine=nothing, fill=missing, threads::Bool=true)
 
 Unstack data frame `df`, i.e. convert it from long to wide format.
 
-Row and column keys will be ordered in the order of their first appearance.
+Row and column keys are ordered in the order of their first appearance.
 
 # Positional arguments
 - `df` : the AbstractDataFrame to be unstacked
@@ -246,27 +243,25 @@ Row and column keys will be ordered in the order of their first appearance.
   return the name of the column to be created (typically as a string or a
   `Symbol`). Duplicates in resulting names when converted to `Symbol` are not
   allowed. By default no transformation is performed.
-- `allowmissing`: if `false` (the default) then an error will be thrown if
+- `allowmissing`: if `false` (the default) then an error is thrown if
   `colkey` contains `missing` values; if `true` then a column referring to
-  `missing` value will be created.
-- `allowduplicates`: if `false` (the default) then an error an error will be
-  thrown if combination of `rowkeys` and `colkey` contains duplicate entries; if
-  `true` then the last encountered `value` will be retained;
-  this keyword argument is ignored if `valuesfunction` keyword argument is passed.
-- `valuesfunction`: if passed then `allowduplicates` is ignored and instead
-   the passed function will be called on a vector view containing all elements
-   for each combination of `rowkeys` and `colkey` present in the data.
+  `missing` value is created.
+- `combine`: if `only` (the default) then an error is thrown if combination
+  of `rowkeys` and `colkey` contains duplicate entries. Otherwise the passed
+  value must be a function that is called on a vector view containing all
+  elements for each combination of `rowkeys` and `colkey` present in the data.
 - `fill`: missing row/column combinations are filled with this value. The
   default is `missing`. If the `value` column is a `CategoricalVector` and
   `fill` is not `missing` then in order to keep unstacked value columns also
   `CategoricalVector` the `fill` must be passed as `CategoricalValue`
-- `threads`: whether `valuesfunction` may be run in separate tasks which
-  can execute in parallel (possibly being applied to multiple groups at the same time).
-  Whether or not tasks are actually spawned and their number are determined automatically.
-  Set to `false` if `valuesfunction` requires serial execution or is not thread-safe.
+- `threads`: whether `combine` function may be run in separate tasks which can
+  execute in parallel (possibly being applied to multiple groups at the same
+  time). Whether or not tasks are actually spawned and their number are
+  determined automatically. Set to `false` if `combine` requires serial
+  execution or is not thread-safe.
 
-Metadata: table-level `:note`-style metadata and column-level `:note`-style metadata
-for row keys columns are preserved.
+Metadata: table-level `:note`-style metadata and column-level `:note`-style
+metadata for row keys columns are preserved.
 
 # Examples
 
@@ -401,14 +396,14 @@ julia> df = DataFrame(cols=["a", "a", "b"], values=[1, 2, 4])
    2 │ a            2
    3 │ b            4
 
-julia> unstack(df, :cols, :values, valuesfunction=copy)
+julia> unstack(df, :cols, :values, combine=copy)
 1×2 DataFrame
  Row │ a        b
      │ Array…?  Array…?
 ─────┼──────────────────
    1 │ [1, 2]   [4]
 
-julia> unstack(df, :cols, :values, valuesfunction=sum)
+julia> unstack(df, :cols, :values, combine=sum)
 1×2 DataFrame
  Row │ a       b
      │ Int64?  Int64?
@@ -418,9 +413,13 @@ julia> unstack(df, :cols, :values, valuesfunction=sum)
 """
 function unstack(df::AbstractDataFrame, rowkeys, colkey::ColumnIndex,
                  values::ColumnIndex; renamecols::Function=identity,
-                 allowmissing::Bool=false, allowduplicates::Bool=false,
-                 valuesfunction=nothing, fill=missing,
-                 threads::Bool=true)
+                 allowmissing::Bool=false,  allowduplicates::Bool=false,
+                 combine=only, fill=missing, threads::Bool=true)
+    if allowduplicates
+        Base.depwarn("allowduplicates keyword argument is deprecated. " *
+                     "Pass `combine=last` instead of allowduplicates=true.", :unstack)
+        combine = last
+    end
     # first make sure that rowkeys are unique and
     # normalize all selectors as a strings
     # if some of the selectors are wrong we will get an early error here
@@ -428,7 +427,7 @@ function unstack(df::AbstractDataFrame, rowkeys, colkey::ColumnIndex,
     colkey = only(names(df, colkey))
     values = only(names(df, values))
 
-    if !isnothing(valuesfunction)
+    if combine !== only
         # potentially colkey can be also part of rowkeys so we need to do unique
         groupcols = unique!([rowkeys; colkey])
         @assert groupcols isa Vector{String}
@@ -441,60 +440,67 @@ function unstack(df::AbstractDataFrame, rowkeys, colkey::ColumnIndex,
         end
 
         gdf = groupby(df, groupcols)
-        if check_aggregate(valuesfunction, df[!, values]) isa AbstractAggregate
-            # if valuesfunction function is AbstractAggregate
+        if check_aggregate(combine, df[!, values]) isa AbstractAggregate
+            # if combine function is AbstractAggregate
             # then we are sure it will return a scalar number so we can
             # leave it as is and be sure we use fast path in combine
-            agg_fun = valuesfunction
+            agg_fun = combine
         else
-            # in general valuesfunction function could return e.g. a vector,
+            # in general combine function could return e.g. a vector,
             # which would get expanded to multiple rows so we protect it with
             # Ref that will get unwrapped by combine
-            agg_fun = Ref∘valuesfunction
+            agg_fun = Ref∘combine
         end
-        df_op = combine(gdf, values => agg_fun => values_out,
-                        threads=threads)
+        df_op = DataFrames.combine(gdf, values => agg_fun => values_out,
+                                   threads=threads)
 
         group_rows = find_group_row(gdf)
         if !issorted(group_rows)
             df_op = df_op[sortperm(group_rows), :]
         end
-        # set allowduplicates to true as we should not have any duplicates now
-        # and allowduplicates=true is a bit faster
-        allowduplicates = true
+        # we should not have any duplicates in df_op now
+        noduplicates = true
     else
         df_op = df
         values_out = values
+        noduplicates = false
     end
 
     g_rowkey = groupby(df_op, rowkeys)
     g_colkey = groupby(df_op, colkey)
     valuecol = df_op[!, values_out]
     return _unstack(df_op, index(df_op)[rowkeys], index(df_op)[colkey], g_colkey,
-                    valuecol, g_rowkey, renamecols,
-                    allowmissing, allowduplicates, fill)
+                    valuecol, g_rowkey, renamecols, allowmissing, noduplicates, fill)
 end
 
 function unstack(df::AbstractDataFrame, colkey::ColumnIndex, values::ColumnIndex;
-                 renamecols::Function=identity,
-                 allowmissing::Bool=false, allowduplicates::Bool=false,
-                 valuesfunction=nothing, fill=missing,
-                 threads::Bool=true)
+                 renamecols::Function=identity, allowmissing::Bool=false,
+                  allowduplicates::Bool=false, combine=only, fill=missing,
+                  threads::Bool=true)
+    if allowduplicates
+        Base.depwarn("allowduplicates keyword argument is deprecated. " *
+                     "Pass `combine=last` instead of allowduplicates=true.", :unstack)
+        combine = last
+    end
     colkey_int = index(df)[colkey]
     value_int = index(df)[values]
     return unstack(df, Not(colkey_int, value_int), colkey_int, value_int,
             renamecols=renamecols, allowmissing=allowmissing,
-            allowduplicates=allowduplicates, valuesfunction=valuesfunction,
+            combine=combine,
             fill=fill, threads=threads)
 end
 
-unstack(df::AbstractDataFrame; renamecols::Function=identity,
-        allowmissing::Bool=false, allowduplicates::Bool=false,
-        valuesfunction=nothing, fill=missing,
-        threads::Bool=true) =
+function unstack(df::AbstractDataFrame; renamecols::Function=identity,
+                 allowmissing::Bool=false, allowduplicates::Bool=false,
+                 combine=only, fill=missing, threads::Bool=true)
+    if allowduplicates
+        Base.depwarn("allowduplicates keyword argument is deprecated. " *
+                     "Pass `combine=last` instead of allowduplicates=true.", :unstack)
+        combine = last
+    end
     unstack(df, :variable, :value, renamecols=renamecols, allowmissing=allowmissing,
-            allowduplicates=allowduplicates, valuesfunction=valuesfunction,
-            fill=fill, threads=threads)
+            combine=combine, fill=fill, threads=threads)
+end
 
 # we take into account the fact that idx, starts and ends are computed lazily
 # so we rather directly reference the gdf.groups
@@ -521,8 +527,7 @@ end
 function _unstack(df::AbstractDataFrame, rowkeys::AbstractVector{Int},
                   colkey::Int, g_colkey::GroupedDataFrame,
                   valuecol::AbstractVector, g_rowkey::GroupedDataFrame,
-                  renamecols::Function, allowmissing::Bool,
-                  allowduplicates::Bool, fill)
+                  renamecols::Function, allowmissing::Bool, noduplicates::Bool, fill)
     rowref = g_rowkey.groups
     row_group_row_idxs = find_group_row(g_rowkey)
     Nrow = length(g_rowkey)
@@ -543,8 +548,8 @@ function _unstack(df::AbstractDataFrame, rowkeys::AbstractVector{Int},
                            Nrow),
                      fill) for _ in 1:Ncol]
 
-    # use a separate path for allowduplicates to reduce memory use and increase speed
-    if allowduplicates
+    # use a separate path for noduplicates to reduce memory use and increase speed
+    if noduplicates
         for (k, (row_id, col_id, val)) in enumerate(zip(rowref, colref, valuecol))
             unstacked_val[col_id][row_id] = val
         end
@@ -556,7 +561,8 @@ function _unstack(df::AbstractDataFrame, rowkeys::AbstractVector{Int},
                 bad_var = colref_map[col_id]
                 throw(ArgumentError("Duplicate entries in unstack at row $k for key "*
                                     "$bad_key and variable $bad_var. " *
-                                    "Pass allowduplicates=true to allow them."))
+                                    "Pass `combine` keyword argument to specify " *
+                                    "how they should be handled."))
             end
             unstacked_val[col_id][row_id] = val
             mask_filled[row_id, col_id] = true
diff --git a/test/deprecated.jl b/test/deprecated.jl
index 6e2035564f..beaba2770b 100644
--- a/test/deprecated.jl
+++ b/test/deprecated.jl
@@ -140,4 +140,19 @@ end
     @test df == DataFrame(x=1, y=1:4)
 end
 
+@testset "deprecated allowduplicates in unstack" begin
+    df = DataFrame(row=[1, 1, 2, 2], variable=["x", "x", "y", "y"], value=1:4)
+    @test_throws ArgumentError unstack(df, :row, :variable, :value)
+    @test unstack(df, :row, :variable, :value, allowduplicates=true) ≅
+          DataFrame(row=1:2, x=[2, missing], y=[missing, 4])
+    @test unstack(df, :variable, :value, allowduplicates=true) ≅
+          DataFrame(row=1:2, x=[2, missing], y=[missing, 4])
+    @test unstack(df, allowduplicates=true) ≅
+          DataFrame(row=1:2, x=[2, missing], y=[missing, 4])
+    @test unstack(df, :variable, :value, allowduplicates=true) ≅
+          DataFrame(row=1:2, x=[2, missing], y=[missing, 4])
+    @test unstack(df, :row, :variable, :value, allowduplicates=true) ≅
+          unstack(df, :row, :variable, :value, combine=last)
+end
+
 end # module
diff --git a/test/metadata.jl b/test/metadata.jl
index df02346169..b9cccfc7f9 100644
--- a/test/metadata.jl
+++ b/test/metadata.jl
@@ -1327,7 +1327,7 @@ end
     @test check_allnotemetadata(res)
     @test getfield(res, :metadata) === nothing
     @test getfield(res, :colmetadata) === nothing
-    res = unstack(long, :a, :variable, :value, valuesfunction=copy)
+    res = unstack(long, :a, :variable, :value, combine=copy)
     @test check_allnotemetadata(res)
     @test getfield(res, :metadata) === nothing
     @test getfield(res, :colmetadata) === nothing
@@ -1361,7 +1361,7 @@ end
     @test isempty(colmetadatakeys(res, :c))
     @test isempty(colmetadatakeys(res, :d))
 
-    res = unstack(long, :a, :variable, :value, valuesfunction=copy)
+    res = unstack(long, :a, :variable, :value, combine=copy)
     @test check_allnotemetadata(res)
     @test collect(metadatakeys(res)) == ["name"]
     @test metadata(res, "name") == "empty"
diff --git a/test/multithreading.jl b/test/multithreading.jl
index 65ab239820..a1826e76da 100644
--- a/test/multithreading.jl
+++ b/test/multithreading.jl
@@ -236,16 +236,13 @@ end
     l = Ref(0)
     m = Ref(0)
     n = Ref(0)
-    unstack(df,
-            allowduplicates=true, valuesfunction=x -> (l[] += 1),
+    unstack(df, combine=x -> (l[] += 1),
             threads=false) ==
             DataFrame(id=1:3, a=[1, 3, 5], b=[2, 4, 6]) ==
-    unstack(df, :variable, :value,
-            allowduplicates=true, valuesfunction=x -> (m[] += 1),
+    unstack(df, :variable, :value, combine=x -> (m[] += 1),
             threads=false) ==
             DataFrame(id=1:3, a=[1, 3, 5], b=[2, 4, 6]) ==
-    unstack(df, :id, :variable, :value,
-            allowduplicates=true, valuesfunction=x -> (n[] += 1),
+    unstack(df, :id, :variable, :value, combine=x -> (n[] += 1),
             threads=false) ==
             DataFrame(id=1:3, a=[1, 3, 5], b=[2, 4, 6])
 
diff --git a/test/reshape.jl b/test/reshape.jl
index f2676b028a..f766f0bfaa 100644
--- a/test/reshape.jl
+++ b/test/reshape.jl
@@ -144,8 +144,8 @@ end
                    variable=["a", "b", "a", "b"], value=[3, 4, 5, 6])
     @test_throws ArgumentError unstack(df, :id, :variable, :value)
     @test_throws ArgumentError unstack(df, :variable, :value)
-    a = unstack(df, :id, :variable, :value, allowduplicates=true)
-    b = unstack(df, :variable, :value, allowduplicates=true)
+    a = unstack(df, :id, :variable, :value, combine=last)
+    b = unstack(df, :variable, :value, combine=last)
     @test a ≅ DataFrame(id=[1, 2], a=[5, missing], b=[missing, 6])
     @test b ≅ DataFrame(id=[1, 2], id2=[1, 2], a=[5, missing], b=[missing, 6])
 
@@ -157,8 +157,8 @@ end
     df = DataFrame(variable=["x", "x"], value=[missing, missing], id=[1, 1])
     @test_throws ArgumentError unstack(df, :variable, :value)
     @test_throws ArgumentError unstack(df, :id, :variable, :value)
-    @test unstack(df, :variable, :value, allowduplicates=true) ≅ DataFrame(id=1, x=missing)
-    @test unstack(df, :id, :variable, :value, allowduplicates=true) ≅ DataFrame(id=1, x=missing)
+    @test unstack(df, :variable, :value, combine=last) ≅ DataFrame(id=1, x=missing)
+    @test unstack(df, :id, :variable, :value, combine=last) ≅ DataFrame(id=1, x=missing)
 end
 
 @testset "missing values in colkey" begin
@@ -546,9 +546,9 @@ end
     df[4, 1:2] .= 1
     @test_throws ArgumentError unstack(df, :id, :var, :val)
     @test_throws ArgumentError unstack(df, [:id, :id2], :var, :val)
-    @test unstack(df, :id, :var, :val, allowduplicates=true) ≅
+    @test unstack(df, :id, :var, :val, combine=last) ≅
           DataFrame(id=1:3, a=[4, missing, 7], b=2:3:8, c=3:3:9)
-    @test unstack(df, [:id, :id2], :var, :val, allowduplicates=true) ≅
+    @test unstack(df, [:id, :id2], :var, :val, combine=last) ≅
           DataFrame(id=1:3, id2=1:3, a=[4, missing, 7], b=2:3:8, c=3:3:9)
 
     df = DataFrame(id=repeat(1:3, inner=3),
@@ -768,62 +768,62 @@ end
 
     df = DataFrame(x=[:one, :two, :one], y=[1, 2, 3])
     @test_throws ArgumentError unstack(df, :x, :y)
-    @test unstack(df, :x, :y, allowduplicates=true) == DataFrame(one=3, two=2)
-    @test unstack(df, :x, :y, valuesfunction=identity) ==
+    @test unstack(df, :x, :y, combine=last) == DataFrame(one=3, two=2)
+    @test unstack(df, :x, :y, combine=identity) ==
           DataFrame(one=[[1, 3]], two=[[2]])
-    @test unstack(df, :x, :y, valuesfunction=last) ==
+    @test unstack(df, :x, :y, combine=last) ==
           DataFrame(one=3, two=2)
-    @test unstack(df, :x, :y, valuesfunction=first) ==
+    @test unstack(df, :x, :y, combine=first) ==
           DataFrame(one=1, two=2)
-    @test unstack(df, :x, :y, valuesfunction=length) ==
+    @test unstack(df, :x, :y, combine=length) ==
           DataFrame(one=2, two=1)
 end
 
-@testset "valuesfunction" begin
+@testset "combine kwarg" begin
     df = DataFrame(rowid=[1, 1, 1, 1, 2, 2], colid=[1, 1, 2, 2, 3, 3], values=1:6)
     @test_throws ArgumentError unstack(df, :rowid, :colid, :values)
-    @test unstack(df, :rowid, :colid, :values, allowduplicates=true) ≅
+    @test unstack(df, :rowid, :colid, :values, combine=last) ≅
           DataFrame("rowid" => 1:2, "1" => [2, missing],
                     "2" => [4, missing], "3" => [missing, 6])
-    @test unstack(df, :rowid, :colid, :values, allowduplicates=true, fill=0) ==
+    @test unstack(df, :rowid, :colid, :values, combine=last, fill=0) ==
           DataFrame("rowid" => 1:2, "1" => [2, 0],
                     "2" => [4, 0], "3" => [0, 6])
-    @test unstack(df, :rowid, :colid, :values, valuesfunction=identity) ≅
+    @test unstack(df, :rowid, :colid, :values, combine=identity) ≅
           DataFrame("rowid" => 1:2, "1" => [1:2, missing],
                     "2" => [3:4, missing], "3" => [missing, 5:6])
     @test unstack(df, :rowid, :colid, :values,
-                  valuesfunction=identity, fill=Int[]) ==
+                  combine=identity, fill=Int[]) ==
           DataFrame("rowid" => 1:2, "1" => [1:2, []],
                     "2" => [3:4, []], "3" => [[], 5:6])
-    @test unstack(df, :rowid, :colid, :values, valuesfunction=sum) ≅
+    @test unstack(df, :rowid, :colid, :values, combine=sum) ≅
           DataFrame("rowid" => 1:2, "1" => [3, missing],
                     "2" => [7, missing], "3" => [missing, 11])
-    @test unstack(df, :rowid, :colid, :values, valuesfunction=sum, fill=0) ==
+    @test unstack(df, :rowid, :colid, :values, combine=sum, fill=0) ==
           DataFrame("rowid" => 1:2, "1" => [3, 0],
                     "2" => [7, 0], "3" => [0, 11])
-    @test unstack(df, :rowid, :colid, :values, valuesfunction=sum, fill="X") ==
+    @test unstack(df, :rowid, :colid, :values, combine=sum, fill="X") ==
           DataFrame("rowid" => 1:2, "1" => [3, "X"],
                     "2" => [7, "X"], "3" => ["X", 11])
-    @test unstack(df, :rowid, :colid, :values, valuesfunction=length) ≅
+    @test unstack(df, :rowid, :colid, :values, combine=length) ≅
           DataFrame("rowid" => 1:2, "1" => [2, missing],
                     "2" => [2, missing], "3" => [missing, 2])
-    @test unstack(df, :rowid, :colid, :values, valuesfunction=length, fill=0) ==
+    @test unstack(df, :rowid, :colid, :values, combine=length, fill=0) ==
           DataFrame("rowid" => 1:2, "1" => [2, 0],
                     "2" => [2, 0], "3" => [0, 2])
     @test unstack(df, :rowid, :colid, :values,
-                  valuesfunction=x -> isempty(x) ? missing : length(x)) ≅
+                  combine=x -> isempty(x) ? missing : length(x)) ≅
           DataFrame("rowid" => 1:2, "1" => [2, missing],
                     "2" => [2, missing], "3" => [missing, 2])
     @test unstack(df, :rowid, :colid, :values,
-                  valuesfunction=x -> isempty(x) ? missing : x) ≅
+                  combine=x -> isempty(x) ? missing : x) ≅
           DataFrame("rowid" => 1:2, "1" => [1:2, missing],
                     "2" => [3:4, missing], "3" => [missing, 5:6])
 
     df = DataFrame(rowid=[2, 2, 2, 2, 1, 1], colid=[2, 2, 1, 1, 3, 3], values=1:6)
-    @test unstack(df, :rowid, :colid, :values, valuesfunction=identity) ≅
+    @test unstack(df, :rowid, :colid, :values, combine=identity) ≅
           DataFrame("rowid" => [2,1], "2" => [1:2, missing],
                     "1" => [3:4, missing], "3" => [missing, 5:6])
-    @test unstack(df, :rowid, :colid, :values, valuesfunction=identity, fill="X") ==
+    @test unstack(df, :rowid, :colid, :values, combine=identity, fill="X") ==
           DataFrame("rowid" => [2,1], "2" => [1:2, "X"],
                     "1" => [3:4, "X"], "3" => ["X", 5:6])
 
@@ -831,24 +831,24 @@ end
     # check correctness of row and column ordering
     for _ in 1:10
         df = DataFrame(rowid=rand(1:10, 50), colid=rand(1:10, 50), values=1:50)
-        res = unstack(df, :rowid, :colid, :values, valuesfunction=last)
-        @test res ≅ unstack(df, :rowid, :colid, :values, allowduplicates=true)
+        res = unstack(df, :rowid, :colid, :values, combine=last)
+        @test res ≅ unstack(df, :rowid, :colid, :values, combine=last)
         @test res.rowid == unique(df.rowid)
         @test names(res, Not(1)) == string.(unique(df.colid))
-        res = unstack(df, :rowid, :colid, :values, valuesfunction=last, fill=0)
-        @test res ≅ unstack(df, :rowid, :colid, :values, allowduplicates=true, fill=0)
+        res = unstack(df, :rowid, :colid, :values, combine=last, fill=0)
+        @test res ≅ unstack(df, :rowid, :colid, :values, combine=last, fill=0)
         @test res.rowid == unique(df.rowid)
         @test names(res, Not(1)) == string.(unique(df.colid))
 
         df.rowid=categorical(df.rowid, levels=shuffle(unique(df.rowid)))
         df.colid=categorical(df.colid, levels=shuffle(unique(df.colid)))
-        res = unstack(df, :rowid, :colid, :values, valuesfunction=last)
-        @test res ≅ unstack(df, :rowid, :colid, :values, allowduplicates=true)
+        res = unstack(df, :rowid, :colid, :values, combine=last)
+        @test res ≅ unstack(df, :rowid, :colid, :values, combine=last)
         @test unwrap.(res.rowid) == unique(df.rowid)
         @test names(res, Not(1)) == string.(unique(df.colid))
-        res = unstack(df, :rowid, :colid, :values, valuesfunction=last, fill=0)
+        res = unstack(df, :rowid, :colid, :values, combine=last, fill=0)
         @test res ≅
-            unstack(df, :rowid, :colid, :values, allowduplicates=true, fill=0)
+            unstack(df, :rowid, :colid, :values, combine=last, fill=0)
         @test unwrap.(res.rowid) == unique(df.rowid)
         @test names(res, Not(1)) == string.(unique(df.colid))
     end
@@ -893,7 +893,7 @@ end
         for c in (:a, :b, :c, "a", "b", "c", 1, 2, 3)
             for v in (:a, :b, :c, "a", "b", "c", 1, 2, 3)
                 @test unstack(df, r, c, v) ≅
-                      broadcast(x -> x isa Vector ? only(x) : x, unstack(df, r, c, v, valuesfunction=copy))
+                      broadcast(x -> x isa Vector ? only(x) : x, unstack(df, r, c, v, combine=copy))
             end
         end
     end
@@ -906,7 +906,7 @@ end
                     d=["a", missing, missing],
                     e=[missing, "b", missing],
                     f=[missing, missing, "c"])
-    @test unstack(df, 3, 2, 1, valuesfunction=only) ≅
+    @test unstack(df, 3, 2, 1, combine=only) ≅
           DataFrame(values_out_3490283_11=["g", "h", "i"],
                     d=["a", missing, missing],
                     e=[missing, "b", missing],

From 9bbe6bcdead35f47219d6bb6b8190e66df3d0861 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Sun, 2 Oct 2022 11:36:12 +0200
Subject: [PATCH 3/5] Update NEWS.md

Co-authored-by: Milan Bouchet-Valat <nalimilan@club.fr>
---
 NEWS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index 16e7a44079..156924509e 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -68,7 +68,7 @@
 
 # Deprecations
 
-* `allowduplicates` keyword argument in `unstack` is deprecated, use
+* `allowduplicates` keyword argument in `unstack` is deprecated,
   `combine` should be used instead
   ([#3185](https://github.com/JuliaData/DataFrames.jl/pull/3185))
 

From d651499ccba118a7cc83005ec3d6e8a5996c3167 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Sun, 2 Oct 2022 11:39:30 +0200
Subject: [PATCH 4/5] improve docstings

---
 NEWS.md                          | 2 +-
 src/abstractdataframe/reshape.jl | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index 156924509e..3a6d73c9b4 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -69,7 +69,7 @@
 # Deprecations
 
 * `allowduplicates` keyword argument in `unstack` is deprecated,
-  `combine` should be used instead
+  `combine` keyword argument should be used instead
   ([#3185](https://github.com/JuliaData/DataFrames.jl/pull/3185))
 
 ## Internal changes
diff --git a/src/abstractdataframe/reshape.jl b/src/abstractdataframe/reshape.jl
index 00ce50187d..da717359fd 100644
--- a/src/abstractdataframe/reshape.jl
+++ b/src/abstractdataframe/reshape.jl
@@ -263,6 +263,12 @@ Row and column keys are ordered in the order of their first appearance.
 Metadata: table-level `:note`-style metadata and column-level `:note`-style
 metadata for row keys columns are preserved.
 
+# Deprecations
+
+- `allowduplicates` keyword argument is deprecated; instead use `combine`
+  keyword argument; an equivalent to `allowduplicates=true` is `combine=last`
+  and to `allowduplicates=false` is `combine=only` (the default);
+
 # Examples
 
 ```jldoctest

From 91f3a0967ed242ebe74852df2a05996a7f7584c9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Mon, 3 Oct 2022 08:25:15 +0200
Subject: [PATCH 5/5] Update src/abstractdataframe/reshape.jl

---
 src/abstractdataframe/reshape.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/abstractdataframe/reshape.jl b/src/abstractdataframe/reshape.jl
index da717359fd..96a3ab9aeb 100644
--- a/src/abstractdataframe/reshape.jl
+++ b/src/abstractdataframe/reshape.jl
@@ -423,7 +423,7 @@ function unstack(df::AbstractDataFrame, rowkeys, colkey::ColumnIndex,
                  combine=only, fill=missing, threads::Bool=true)
     if allowduplicates
         Base.depwarn("allowduplicates keyword argument is deprecated. " *
-                     "Pass `combine=last` instead of allowduplicates=true.", :unstack)
+                     "Pass `combine=last` instead of `allowduplicates=true`.", :unstack)
         combine = last
     end
     # first make sure that rowkeys are unique and