From 528c2ced252cb3af49d992a92f24da1ca56462cf Mon Sep 17 00:00:00 2001 From: Matt Davis Date: Tue, 5 May 2015 11:44:40 -0700 Subject: [PATCH] use numpy indexing in cdap.make_interactions Once again we get significant speedups by working with NumPy arrays instead of Pandas structures. --- activitysim/cdap/cdap.py | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/activitysim/cdap/cdap.py b/activitysim/cdap/cdap.py index f1da8836a..be8232e6c 100644 --- a/activitysim/cdap/cdap.py +++ b/activitysim/cdap/cdap.py @@ -48,24 +48,42 @@ def make_interactions(people, hh_id_col, p_type_col): three_fmt = '{}{}{}'.format two = [] three = [] + two_perm_cache = {} + three_combo_cache = {} + + for hh_id, df in people.groupby(hh_id_col, sort=False): + hh_size = len(df) - for hh, df in people.groupby(hh_id_col, sort=False): # skip households with only one person - if len(df) == 1: + if hh_size == 1: continue - ptypes = df[p_type_col] + ptypes = df[p_type_col].values + hh_idx = df.index.values - for pA, pB in itertools.permutations(df.index, 2): - two.append((pA, two_fmt(*ptypes[[pA, pB]]))) + if hh_size in two_perm_cache: + two_perms = two_perm_cache[hh_size] + else: + two_perms = list(itertools.permutations(np.arange(hh_size), 2)) + two_perm_cache[hh_size] = two_perms + + two.extend( + (hh_idx[pA], two_fmt(*ptypes[[pA, pB]])) for pA, pB in two_perms) # now skip households with two people - if len(df) == 2: + if hh_size == 2: continue - for idx in itertools.combinations(df.index, 3): - combo = three_fmt(*ptypes[list(idx)]) - three.extend((p, combo) for p in idx) + if hh_size in three_combo_cache: + three_combos = three_combo_cache[hh_size] + else: + three_combos = list(itertools.combinations(np.arange(hh_size), 3)) + three_combo_cache[hh_size] = three_combos + + three.extend( + (hh_idx[p], three_fmt(*ptypes.take(idx))) + for idx in three_combos + for p in idx) if two: two_idx, two_val = zip(*two)