Merge pull request #1 from tshort/make_unique

Added make_unique() and changed cbind to use it.
JuliaData · Jul 14, 2012 · f89ef44 · f89ef44
2 parents 70157ee + c4fa626
commit f89ef44
Showing 1 changed file with 33 additions and 5 deletions.
diff --git a/src/data.jl b/src/data.jl
@@ -1342,11 +1342,11 @@ function cbind!(df1::DataFrame, df2::DataFrame)
     # TODO fix this
     ## newcolnames = convert(Vector{CT1}, df2.colnames)
     newcolnames = colnames(df2)
-    # and if there are no duplicate column names
-    if !nointer(colnames(df1), newcolnames)
-        error("can't cbind dataframes with overlapping column names!")
-    end
-    df1.colindex = Index(concat(colnames(df1), colnames(df2)))
+    ## # and if there are no duplicate column names
+    ## if !nointer(colnames(df1), newcolnames)
+    ##     error("can't cbind dataframes with overlapping column names!")
+    ## end
+    df1.colindex = Index(make_unique(concat(colnames(df1), colnames(df2))))
     df1.columns = [df1.columns, df2.columns]
     df1
 end
@@ -1757,6 +1757,34 @@ function _uniqueofsorted(x::Vector)
     x[idx]
 end
 
+function make_unique{S<:ByteString}(names::Vector{S})
+    x = Index()
+    names = copy(names)
+    dups = Int[]
+    for i in 1:length(names)
+        if has(x, names[i])
+            push(dups, i)
+        else
+            push(x, names[i])
+        end
+    end
+    for i in dups
+        nm = names[i]
+        newnm = nm
+        k = 1
+        while true
+            newnm = "$(nm)_$k"
+            if !has(x, newnm)
+                push(x, newnm)
+                break
+            end
+            k += 1
+        end
+        names[i] = newnm
+    end
+    names
+end
+
 unique(pd::PooledDataVec) = pd.pool
 sort(pd::PooledDataVec) = pd[order(pd)]
 order(pd::PooledDataVec) = groupsort_indexer(pd)[1]