Skip to content

Commit

Permalink
Review (WillAyd)
Browse files Browse the repository at this point in the history
  • Loading branch information
h-vetinari committed Oct 9, 2018
1 parent 9761747 commit 5cf6941
Showing 1 changed file with 21 additions and 21 deletions.
42 changes: 21 additions & 21 deletions pandas/core/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,15 @@
_shared_docs = dict()


def cat_core(list_of_columns, sep):
def cat_core(all_cols, sep):
"""
Auxiliary function for :meth:`str.cat`
Parameters
----------
list_of_columns : list of numpy arrays
List of arrays to be concatenated with sep;
these arrays may not contain NaNs!
all_cols : two-dimensional numpy array
array of columns to be concatenated with sep;
this array may not contain NaNs!
sep : string
The separator string for concatenating the columns
Expand All @@ -54,9 +54,12 @@ def cat_core(list_of_columns, sep):
nd.array
The concatenation of list_of_columns with sep
"""
list_of_columns = np.split(all_cols, all_cols.shape[1], axis=1)
list_with_sep = [sep] * (2 * len(list_of_columns) - 1)
list_with_sep[::2] = list_of_columns
return np.sum(list_with_sep, axis=0)
# np.split splits into arrays of shape (N, 1); NOT (N,)
# need to reduce dimensionality of result
return np.sum(list_with_sep, axis=0)[:, 0]


def _na_map(f, arr, na_result=np.nan, dtype=object):
Expand Down Expand Up @@ -2239,21 +2242,21 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
"'outer'|'inner'|'right'`. The future default will "
"be `join='left'`.", FutureWarning, stacklevel=2)

# if join is None, _get_series_list already aligned indexes
join = 'left' if join is None else join
# concatenate others into DataFrame; need to add keys for uniqueness in
# case of duplicate columns (for join is None, all indexes are already
# the same after _get_series_list, which forces alignment in this case)
others = concat(others, axis=1,
join=(join if join == 'inner' else 'outer'),
keys=range(len(others)), copy=False)

# align if required
if any(not data.index.equals(x.index) for x in others):
# Need to add keys for uniqueness in case of duplicate columns
others = concat(others, axis=1,
join=(join if join == 'inner' else 'outer'),
keys=range(len(others)), copy=False)
if not data.index.equals(others.index):
data, others = data.align(others, join=join)
others = [others[x] for x in others] # again list of Series

all_cols = [ensure_object(x) for x in [data] + others]
na_masks = np.array([isna(x) for x in all_cols])
union_mask = np.logical_or.reduce(na_masks, axis=0)
# collect all columns
all_cols = ensure_object(concat([data, others], axis=1, copy=False))
na_masks = isna(all_cols)
union_mask = np.logical_or.reduce(na_masks, axis=1)

if na_rep is None and union_mask.any():
# no na_rep means NaNs for all rows where any column has a NaN
Expand All @@ -2262,13 +2265,10 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
np.putmask(result, union_mask, np.nan)

not_masked = ~union_mask
result[not_masked] = cat_core([x[not_masked] for x in all_cols],
sep)
result[not_masked] = cat_core(all_cols[not_masked], sep)
elif na_rep is not None and union_mask.any():
# fill NaNs with na_rep in case there are actually any NaNs
all_cols = [np.where(nm, na_rep, col)
for nm, col in zip(na_masks, all_cols)]
result = cat_core(all_cols, sep)
result = cat_core(np.where(na_masks, na_rep, all_cols), sep)
else:
# no NaNs - can just concatenate
result = cat_core(all_cols, sep)
Expand Down

0 comments on commit 5cf6941

Please sign in to comment.