-
Notifications
You must be signed in to change notification settings - Fork 2
/
perception_utils.py
71 lines (41 loc) · 2.07 KB
/
perception_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import pandas as pd
from sklearn.linear_model import LinearRegression
from convokit import Corpus, Utterance, Speaker
# scale scores from 1-25 range to -3 - 3 range
def scale_func(val, a = 0.25, b = -3.25):
return a*val + b
def get_strategies_df(corpus, strategy_attribute_name):
strategies_dict = {utt.id: utt.meta[strategy_attribute_name] for utt in corpus.iter_utterances()}
df = pd.DataFrame.from_dict(strategies_dict, orient="index")
# return df with alphabetically ordered feature name
return df.sort_index(axis=1)
def train_model(X, y):
return LinearRegression().fit(X, y)
def get_model_info(df, y):
lr_model = train_model(df.values, y)
coefs = dict(zip(df.keys(), lr_model.coef_))
intercept = lr_model.intercept_
return {"coefs": coefs, "intercept": intercept}
# train individualized model
# using coefs from average model for strategies that are under-annotated
def get_ind_model_info(df_feat, scores, avg_coefs, min_cnt = 15):
feat_counts = dict(df_feat.sum(axis=0))
good_feats = [k for k,v in feat_counts.items() if v > min_cnt]
# if certain strategies have too little annotations from the individual
# we consider directly using the "average-person" view
low_count_feats = {k for k,v in feat_counts.items() if v <= min_cnt}
# adjust scores for low counts
for feat in low_count_feats:
feat_ids = df_feat[df_feat[feat]==1].index
for idx in feat_ids:
# embed mean coef into "score" (i.e., fixing coefs for these low-count features)
scores[idx] -= avg_coefs[feat]
df_feat_sel = df_feat[good_feats]
df = df_feat_sel
y = [scores[idx] for idx in df_feat_sel.index]
ind_model = get_model_info(df, y)
ind_model['coefs'].update({feat: avg_coefs[feat] for feat in low_count_feats})
return ind_model
# estimate the level of politeness according to a perception model
def estimate_perception(model, strategy_set):
return sum([model['coefs'][s] for s in strategy_set]) + model['intercept']