JuliaData · nalimilan · Sep 3, 2019 · Sep 3, 2019 · Sep 3, 2019 · Sep 3, 2019
diff --git a/src/groupeddataframe/grouping.jl b/src/groupeddataframe/grouping.jl
@@ -212,8 +212,10 @@ If the first argument is a vector, tuple or named tuple of such pairs, each pair
 handled as described above. If a named tuple, field names are used to name
 each generated column.
 
-If the first argument is a callable, it is passed a `SubDataFrame` view for each group,
+If the first argument is a callable `f`, it is passed a [`SubDataFrame`](@ref) view for each group,
 and the returned `DataFrame` then consists of the returned rows plus the grouping columns.
+If the returned data frame contains columns with the same names as the grouping columns,
+they are required to be equal.
 Note that this second form is much slower than the first one due to type instability.
 
 `f` can return a single value, a row or multiple rows. The type of the returned value
@@ -297,7 +299,16 @@ See [`by`](@ref) for more examples.
 function Base.map(f::Any, gd::GroupedDataFrame)
     if length(gd) > 0
         idx, valscat = _combine(f, gd)
-        parent = hcat!(gd.parent[idx, gd.cols], valscat, makeunique=true)
+        keys = _names(gd.parent)[gd.cols]
+        for key in keys
+            if hasproperty(valscat, key) &&
+               !isequal(valscat[!, key], view(gd.parent[!, key], idx))
+               throw(ArgumentError("column :$key in returned data frame " *
+                                   "is not equal to grouping key :$key"))
+            end
+        end
+        parent = hcat!(gd.parent[idx, gd.cols],
+                       without(valscat, intersect(keys, _names(valscat))))
         if length(idx) == 0
             return GroupedDataFrame(parent, collect(1:length(gd.cols)), idx,
                                     Int[], Int[], Int[])
@@ -343,6 +354,8 @@ views into these columns.
 
 If the last argument is a callable `f`, it is passed a [`SubDataFrame`](@ref) view for each group,
 and the returned `DataFrame` then consists of the returned rows plus the grouping columns.
+If the returned data frame contains columns with the same names as the grouping columns,
+they are required to be equal.
 Note that this second form is much slower than the first one due to type instability.
 A method is defined with `f` as the first argument, so do-block
 notation can be used.
@@ -435,7 +448,16 @@ of `combine(map(f, groupby(df, cols)))`.
 function combine(f::Any, gd::GroupedDataFrame)
     if length(gd) > 0
         idx, valscat = _combine(f, gd)
-        return hcat!(gd.parent[idx, gd.cols], valscat, makeunique=true)
+        keys = _names(gd.parent)[gd.cols]
+        for key in keys
+            if hasproperty(valscat, key) &&
+               !isequal(valscat[!, key], view(gd.parent[!, key], idx))
+               throw(ArgumentError("column :$key in returned data frame " *
+                                   "is not equal to grouping key :$key"))
+            end
+        end
+        return hcat!(gd.parent[idx, gd.cols],
+                     without(valscat, intersect(keys, _names(valscat))))
     else
         return gd.parent[1:0, gd.cols]
     end
@@ -948,6 +970,8 @@ views into these columns.
 
 If the last argument is a callable `f`, it is passed a [`SubDataFrame`](@ref) view for each group,
 and the returned `DataFrame` then consists of the returned rows plus the grouping columns.
+If the returned data frame contains columns with the same names as the grouping columns,
+they are required to be equal.
 Note that this second form is much slower than the first one due to type instability.
 A method is defined with `f` as the first argument, so do-block
 notation can be used.

diff --git a/test/grouping.jl b/test/grouping.jl
@@ -104,8 +104,8 @@ end
     for cols in ([:a, :b], [:b, :a], [:a, :c], [:c, :a],
                  [1, 2], [2, 1], [1, 3], [3, 1],
                  [true, true, false, false], [true, false, true, false])
-        colssym = names(df[:, cols])
-        hcatdf = hcat(df[:, cols], df, makeunique=true)
+        colssym = names(df[!, cols])
+        hcatdf = hcat(df[!, cols], df[!, Not(cols)])
         nms = names(hcatdf)
         res = unique(df[:, cols])
         res.xmax = [maximum(df[(df[!, colssym[1]] .== a) .& (df[!, colssym[2]] .== b), :x])
@@ -163,7 +163,7 @@ end
         df_comb = combine(identity, gd)
         @test sort(df_comb, colssym) == shcatdf
         df_ref = DataFrame(gd)
-        @test sort(hcat(df_ref[:, cols], df_ref, makeunique=true), colssym) == shcatdf
+        @test sort(hcat(df_ref[!, cols], df_ref[!, Not(cols)]), colssym) == shcatdf
         @test df_ref.x == df_comb.x
         @test combine(f1, gd) == res
         @test combine(f2, gd) == res
@@ -183,7 +183,7 @@ end
         end
         @test combine(identity, gd) == shcatdf
         df_ref = DataFrame(gd)
-        @test hcat(df_ref[:, cols], df_ref, makeunique=true) == shcatdf
+        @test hcat(df_ref[!, cols], df_ref[!, Not(cols)]) == shcatdf
         @test combine(f1, gd) == sres
         @test combine(f2, gd) == sres
         @test rename(combine(f3, gd), :x1 => :xmax) == sres
@@ -342,7 +342,7 @@ end
 
     # Test function returning DataFrameRow
     res = by(d -> DataFrameRow(d, 1, :), df, :x)
-    @test res == DataFrame(x=df.x, x_1=df.x, y=df.y)
+    @test res == DataFrame(x=df.x, y=df.y)
 
     # Test function returning Tuple
     res = by(d -> (sum(d.y),), df, :x)
@@ -893,6 +893,25 @@ Base.isless(::TestType, ::TestType) = false
     end
 end
 
+@testset "combine and map with columns named like grouping keys" begin
+    df = DataFrame(x=["a", "a", "b", missing], y=1:4)
+    gd = groupby(df, :x)
+    @test combine(identity, gd) ≅ df
+    @test combine(d -> d[:, [2, 1]], gd) ≅ df
+    @test_throws ArgumentError combine(f -> DataFrame(x=["a", "b"], z=[1, 1]), gd)
+    @test map(identity, gd) ≅ gd
+    @test map(d -> d[:, [2, 1]], gd) ≅ gd
+    @test_throws ArgumentError map(f -> DataFrame(x=["a", "b"], z=[1, 1]), gd)
+
+    gd = groupby(df, :x, skipmissing=true)
+    @test combine(identity, gd) == df[1:3, :]
+    @test combine(d -> d[:, [2, 1]], gd) == df[1:3, :]
+    @test_throws ArgumentError combine(f -> DataFrame(x=["a", "b"], z=[1, 1]), gd)
+    @test map(identity, gd) == gd
+    @test map(d -> d[:, [2, 1]], gd) == gd
+    @test_throws ArgumentError map(f -> DataFrame(x=["a", "b"], z=[1, 1]), gd)
+end
+
 @testset "iteration protocol" begin
     gd = groupby_checked(DataFrame(A = [:A, :A, :B, :B], B = 1:4), :A)
     for v in gd