Skip to content

Commit

Permalink
use numpy indexing in cdap.make_interactions
Browse files Browse the repository at this point in the history
Once again we get significant speedups by working with NumPy arrays
instead of Pandas structures.
  • Loading branch information
jiffyclub committed May 5, 2015
1 parent 43a13bc commit 528c2ce
Showing 1 changed file with 27 additions and 9 deletions.
36 changes: 27 additions & 9 deletions activitysim/cdap/cdap.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,24 +48,42 @@ def make_interactions(people, hh_id_col, p_type_col):
three_fmt = '{}{}{}'.format
two = []
three = []
two_perm_cache = {}
three_combo_cache = {}

for hh_id, df in people.groupby(hh_id_col, sort=False):
hh_size = len(df)

for hh, df in people.groupby(hh_id_col, sort=False):
# skip households with only one person
if len(df) == 1:
if hh_size == 1:
continue

ptypes = df[p_type_col]
ptypes = df[p_type_col].values
hh_idx = df.index.values

for pA, pB in itertools.permutations(df.index, 2):
two.append((pA, two_fmt(*ptypes[[pA, pB]])))
if hh_size in two_perm_cache:
two_perms = two_perm_cache[hh_size]
else:
two_perms = list(itertools.permutations(np.arange(hh_size), 2))
two_perm_cache[hh_size] = two_perms

two.extend(
(hh_idx[pA], two_fmt(*ptypes[[pA, pB]])) for pA, pB in two_perms)

# now skip households with two people
if len(df) == 2:
if hh_size == 2:
continue

for idx in itertools.combinations(df.index, 3):
combo = three_fmt(*ptypes[list(idx)])
three.extend((p, combo) for p in idx)
if hh_size in three_combo_cache:
three_combos = three_combo_cache[hh_size]
else:
three_combos = list(itertools.combinations(np.arange(hh_size), 3))
three_combo_cache[hh_size] = three_combos

three.extend(
(hh_idx[p], three_fmt(*ptypes.take(idx)))
for idx in three_combos
for p in idx)

if two:
two_idx, two_val = zip(*two)
Expand Down

0 comments on commit 528c2ce

Please sign in to comment.