From c4fa626cfcefeb06f01d3d4da5af4a48cc6cfbf2 Mon Sep 17 00:00:00 2001
From: Tom Short <t.short@ieee.org>
Date: Fri, 13 Jul 2012 14:42:10 -0400
Subject: [PATCH] Added make_unique() and changed cbind to use it.

---
 src/data.jl | 38 +++++++++++++++++++++++++++++++++-----
 1 file changed, 33 insertions(+), 5 deletions(-)

diff --git a/src/data.jl b/src/data.jl
index db613c300a..036f263ea1 100644
--- a/src/data.jl
+++ b/src/data.jl
@@ -1342,11 +1342,11 @@ function cbind!(df1::DataFrame, df2::DataFrame)
     # TODO fix this
     ## newcolnames = convert(Vector{CT1}, df2.colnames)
     newcolnames = colnames(df2)
-    # and if there are no duplicate column names
-    if !nointer(colnames(df1), newcolnames)
-        error("can't cbind dataframes with overlapping column names!")
-    end
-    df1.colindex = Index(concat(colnames(df1), colnames(df2)))
+    ## # and if there are no duplicate column names
+    ## if !nointer(colnames(df1), newcolnames)
+    ##     error("can't cbind dataframes with overlapping column names!")
+    ## end
+    df1.colindex = Index(make_unique(concat(colnames(df1), colnames(df2))))
     df1.columns = [df1.columns, df2.columns]
     df1
 end
@@ -1757,6 +1757,34 @@ function _uniqueofsorted(x::Vector)
     x[idx]
 end
 
+function make_unique{S<:ByteString}(names::Vector{S})
+    x = Index()
+    names = copy(names)
+    dups = Int[]
+    for i in 1:length(names)
+        if has(x, names[i])
+            push(dups, i)
+        else
+            push(x, names[i])
+        end
+    end
+    for i in dups
+        nm = names[i]
+        newnm = nm
+        k = 1
+        while true
+            newnm = "$(nm)_$k"
+            if !has(x, newnm)
+                push(x, newnm)
+                break
+            end
+            k += 1
+        end
+        names[i] = newnm
+    end
+    names
+end
+
 unique(pd::PooledDataVec) = pd.pool
 sort(pd::PooledDataVec) = pd[order(pd)]
 order(pd::PooledDataVec) = groupsort_indexer(pd)[1]