From c4fa626cfcefeb06f01d3d4da5af4a48cc6cfbf2 Mon Sep 17 00:00:00 2001 From: Tom Short Date: Fri, 13 Jul 2012 14:42:10 -0400 Subject: [PATCH] Added make_unique() and changed cbind to use it. --- src/data.jl | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/src/data.jl b/src/data.jl index db613c300a..036f263ea1 100644 --- a/src/data.jl +++ b/src/data.jl @@ -1342,11 +1342,11 @@ function cbind!(df1::DataFrame, df2::DataFrame) # TODO fix this ## newcolnames = convert(Vector{CT1}, df2.colnames) newcolnames = colnames(df2) - # and if there are no duplicate column names - if !nointer(colnames(df1), newcolnames) - error("can't cbind dataframes with overlapping column names!") - end - df1.colindex = Index(concat(colnames(df1), colnames(df2))) + ## # and if there are no duplicate column names + ## if !nointer(colnames(df1), newcolnames) + ## error("can't cbind dataframes with overlapping column names!") + ## end + df1.colindex = Index(make_unique(concat(colnames(df1), colnames(df2)))) df1.columns = [df1.columns, df2.columns] df1 end @@ -1757,6 +1757,34 @@ function _uniqueofsorted(x::Vector) x[idx] end +function make_unique{S<:ByteString}(names::Vector{S}) + x = Index() + names = copy(names) + dups = Int[] + for i in 1:length(names) + if has(x, names[i]) + push(dups, i) + else + push(x, names[i]) + end + end + for i in dups + nm = names[i] + newnm = nm + k = 1 + while true + newnm = "$(nm)_$k" + if !has(x, newnm) + push(x, newnm) + break + end + k += 1 + end + names[i] = newnm + end + names +end + unique(pd::PooledDataVec) = pd.pool sort(pd::PooledDataVec) = pd[order(pd)] order(pd::PooledDataVec) = groupsort_indexer(pd)[1]