-
Notifications
You must be signed in to change notification settings - Fork 2
/
fundingutils.py
376 lines (286 loc) · 18.1 KB
/
fundingutils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
import pandas as pd
from itertools import combinations
from math import log
from math import sqrt
from math import floor
import numpy as np
from functools import reduce
def add(x,y):
return x + y
#
#
# definitions for a *bunch* of variations of QF
#
# in all of these functions,
# - donation_df is expected to be a pandas dataframe where rows are wallets, columns are projects, and entries represent a wallet's total donation amount to a project
# - cluster_df is expected to be a pandas dataframe where rows are wallets, columns are cluster, and entries are denote the strength of a user's membership in that cluster.
#
# also important to note: these functions all return the matching amounts each project should get under that variant of QF -- to get the full funding amount,
# you need to add in the direct donations as well!
#
# first, some helper functions
def binarize(df):
return df.applymap(lambda x: 1 if x > 0 else 0)
def align(donation_df, cluster_df):
# first, drop users who haven't made any donations / aren't in any clusters
cluster_df.drop(cluster_df.index[cluster_df.apply(lambda row: all(row == 0), axis=1)],inplace=True)
donation_df.drop(donation_df.index[donation_df.apply(lambda row: all(row == 0), axis=1)],inplace=True)
# Also remove wallets that are just in one dataframe, but not the other
cluster_df.drop(set(cluster_df.index) - set(donation_df.index), inplace=True)
donation_df.drop(set(donation_df.index) - set(cluster_df.index), inplace=True)
#make sure the indices are sorted the same way (important for making sure the matrix multiplications work later)
cluster_df.sort_index(inplace=True)
donation_df.sort_index(inplace=True)
return donation_df, cluster_df
def check_matching_cap(col, matching_cap_percent):
col = col.copy()
while True:
# Step 1: Identify the projects that have matching percentages exceeding the cap
over_cap = np.maximum(0, col - matching_cap_percent)
# Step 2: Set the matching percent to the cap percent for projects exceeding the cap
col.loc[col > matching_cap_percent] = matching_cap_percent
# Step 3: Calculate the total matching percent for projects not exceeding the cap
total_percent_for_not_capped = col[col < matching_cap_percent].sum()
# Step 4: If there isa percentage available for redistribution, redistribute the excess percentage from over-capped projects proportionally
if total_percent_for_not_capped > 0:
remainder_percent = over_cap.sum() / total_percent_for_not_capped
col.loc[col < matching_cap_percent] *= (1 + remainder_percent)
else:
# If no percentage is available for redistribution, exit the loop
break
# Step 5: Check if the updates pushed any project over the cap, if not, exit the loop
over_cap_after_update = np.maximum(0, col - matching_cap_percent)
if not over_cap_after_update.sum() > 0:
break
# Return the updated project data
return col
def scale_matching(funding, matching_cap_percent, matching_amount):
projects = list(funding.keys())
total_money = sum(funding.values())
funding_normalized = {p: funding[p]/total_money for p in projects}
# Create DataFrame with 'project_name' and 'matching_amount' columns
result = pd.DataFrame(list(funding_normalized.items()), columns=['project_name', 'matching_amount'])
# Apply the cap to the 'matching_amount' column
result['matching_amount'] = check_matching_cap(result['matching_amount'], matching_cap_percent)
# Scale the 'matching_amount' column by the total matching amount
result['matching_amount'] = result['matching_amount'] * matching_amount
return result
# now on to the QF variants
def standard_qf(donation_df):
projects = donation_df.columns
funding = {p: (donation_df[p].apply(lambda x: sqrt(x)).sum() ** 2) - donation_df[p].sum() for p in projects}
return funding
def pairwise(donation_df, M=0.01):
projects = donation_df.columns
donors = donation_df.index
# start off with funding = sum of individual donations, then add the pairwise matching amounts
#funding = {p: donation_df[p].sum() for p in projects}
funding = {p : 0 for p in projects}
sqrt_donation_df = donation_df.apply(lambda col: np.sqrt(col))
# The next line of code creates a matrix containing each pairwise coefficient k_i,j
# In-depth expanation:
# The dot product is a matrix multiplication that will give us a matrix where entry i,j is the dot product of
# i's square-rooted donation vector with j's square-rooted donation vector.
# Next, even though M is technically a scalar, pandas will automatically interpret the syntax "M + <matrix>"
# by assuming that M here refers to a matrix with M in every entry, and the same dimensions as the actual matrix
# on the other side of the +.
# Same goes for "M / <matrix>".
# The result is a matrix, "k_matrix", where entry i,j is the k_i,j described in the original pairwise matching blog post
k_matrix = M / (M + sqrt_donation_df.dot(sqrt_donation_df.transpose()))
proj_sets = {d : set([p for p in projects if donation_df.loc[d, p] > 0]) for d in donors}
for wallet1, wallet2 in combinations(donors,2):
for p in proj_sets[wallet1].intersection(proj_sets[wallet2]):
funding[p] += sqrt_donation_df.loc[wallet1, p] * sqrt_donation_df.loc[wallet2, p] * k_matrix.loc[wallet1, wallet2]
return funding
def cluster_profile_pairwise(donation_df, cluster_df):
cluster_df = binarize(cluster_df)
donation_df, cluster_df = align(donation_df, cluster_df)
projects = donation_df.columns
donors = donation_df.index
clusters = cluster_df.columns
cluster_members = cluster_df.index
# start off with funding = sum of individual donations, then add the pairwise matching amounts
#funding = {p: donation_df[p].sum() for p in projects}
funding = {p : 0 for p in projects}
# the pairwise matching coefficient for agents i and j is:
# (# groups just i is in + # groups just j is in) / (# groups i is in + # groups j is in)
# first, make a matrix whose entries are the numerators of the above formula for every pair of agents
# we make it by first setting each entry to be the total number of clusters, then subracting the clusters that both i and j are in,
# then subtracting the clusters that neither i nor j are in. We're left with the clusters that exactly one of i or j are in.
numerator_matrix = pd.DataFrame(index=donors, columns=donors, data=len(clusters)) - cluster_df.dot(cluster_df.transpose()) - ((1-cluster_df).dot(1-cluster_df.transpose()))
# now we make a matrix C representing the denominators of the above formula
# A is a vector where entry i is the number of groups i is in
A = cluster_df.apply(sum, axis=1)
# B is a matrix where every entry in row i is the number of groups i is in
B = pd.DataFrame(index=donors,columns=donors,data=[A]*len(donors))
# by adding B and its transpose, we get a matrix where entry (i,j) is the number of groups i is in + the number of groups j is in
denominator_matrix = B + B.transpose()
# finally, we can get the coefficient matrix by dividing the numerators by the denominators
coeffs = numerator_matrix / denominator_matrix
for p in projects:
non_donors = donation_df[donation_df[p] == 0].index
donor_only_donation_df = donation_df.drop(non_donors, axis=0)
donor_only_coeffs = coeffs.drop(non_donors, axis=1).drop(non_donors, axis=0)
y = donor_only_donation_df[p].apply(sqrt)
z = pd.DataFrame(y)
QF_matrix = z.dot(z.transpose())
funding[p] += (QF_matrix * donor_only_coeffs).sum().sum()
return funding
def clustermatch(donation_df, cluster_df):
projects = donation_df.columns
clusters = cluster_df.columns
donors = donation_df.index
cluster_members = cluster_df.index
normalized_clusters = cluster_df.apply(lambda row: row / row.sum() if any(row) else 0, axis=1)
donation_df.drop(list(set(donors) - set(cluster_members)), inplace=True)
normalized_clusters.drop(list(set(cluster_members) - set(donors)), inplace=True)
normalized_clusters.sort_index(inplace=True)
donation_df.sort_index(inplace=True)
B = donation_df.transpose().dot(normalized_clusters)
# B should be a matrix where rows are projects, columns are clusters, and entry (i,j) is cluster j's donation to project i
funding = {p: B.loc[p].apply(lambda x: sqrt(x)).sum() ** 2 - B.loc[p].sum() for p in projects}
return funding
def donation_profile_clustermatch(donation_df):
# run cluster match, using donation profiles as the clusters
# i.e., everyone who donated to the same set of projects gets put under the same square root.
# donation_df is expected to be a pandas Dataframe where rows are unique donors, columns are projects,
# and entry i,j denote user i's total donation to project j
# we'll store donation profiles as binary strings.
# i.e. say there are four projects total. if an agent donated to project 0, project 1, and project 3, they will be put in cluster "1101".
# here the indices 0,1,2,3 refer to the ordering in the input list of projects.
projects = donation_df.columns
don_profiles = donation_df.apply(lambda row: ''.join('1' if row[p] > 0 else '0' for p in projects), axis=1)
don_profile_df = pd.DataFrame(index=donation_df.index, columns=don_profiles.unique(), data=0)
for wallet in donation_df.index:
don_profile_df.loc[wallet, don_profiles[wallet]] = 1
return clustermatch(donation_df, don_profile_df)
def COCM(donation_df, cluster_df, fancy=True):
# run CO-CM on a set of funding amounts and clusters
# if "fancy" is false, follow the formula in the whitepaper exactly. If "fancy" is true, get fancy with it.
# # first, drop users who haven't made any donations / aren't in any clusters
# cluster_df.drop(cluster_df.index[cluster_df.apply(lambda row: all(row == 0), axis=1)],inplace=True)
# donation_df.drop(donation_df.index[donation_df.apply(lambda row: all(row == 0), axis=1)],inplace=True)
# # Also remove wallets that are just in one dataframe, but not the other
# cluster_df.drop(set(cluster_df.index) - set(donation_df.index), inplace=True)
# donation_df.drop(set(donation_df.index) - set(cluster_df.index), inplace=True)
# #make sure the indices are sorted the same way (important for making sure the matrix multiplications work later)
# cluster_df.sort_index(inplace=True)
# donation_df.sort_index(inplace=True)
donation_df, cluster_df = align(donation_df, cluster_df)
projects = donation_df.columns
clusters = cluster_df.columns
donors = donation_df.index
cluster_members = cluster_df.index
# normalize the cluster dataframe so that rows sum to 1. Now, an entry tells us the "weight" that a particular cluster has for a particular user.
# if a user is in 0 clusters, their row will be a bunch of NaNs if we naively divide by 1.
# we shouldn't have any such users anyways, but just in case, we'll fill such a row with 0s instead
normalized_clusters = cluster_df.apply(lambda row: row / row.sum() if any(row) else 0, axis=1)
binarized_clusters = binarize(cluster_df)
if fancy:
# friendship_matrix is a matrix whose rows and columns are both wallets,
# and a value of 1 at index i,j means that wallets i and j are in at least one cluster together.
friendship_matrix = cluster_df.dot(cluster_df.transpose()).apply(lambda col: col > 0)
# k_indicators is a dataframe with wallets as rows and clusters as columns.
# if wallet i is not in cluster g, then entry i,g is is the fraction of i's friends who are in cluster g (i's friends are the agents i is in a shared cluster with).
# if wallet i is in cluster g, then entry i,g is 1.
# in the past, we used cluster_df in the following line instead of binarized_clusters
k_indicators = friendship_matrix.dot(binarized_clusters).apply(lambda row: row / friendship_matrix.loc[row.name].sum(), axis=1)
# ... and the following line used cluster_df instead of binarized_clusters
k_indicators = k_indicators.apply(lambda row: np.maximum(row, binarized_clusters.loc[row.name]), axis=1)
else:
# friendship_matrix is a matrix whose rows and columns are both wallets,
# and a value greater than 0 at index i,j means that wallets i and j are in at least one group together.
friendship_matrix = cluster_df.dot(cluster_df.transpose())
# k_indicators is a dataframe with wallets as rows and stamps as columns.
# entry i,g is True if wallet i is in a shared group with anyone from g, and False otherwise.
k_indicators = friendship_matrix.dot(cluster_df).apply(lambda col: col > 0)
# Create a dictionary to store funding amounts for each project.
# first we'll fund each project with the sum of donations to that project
# then we'll add in the pairwise matching amounts, which is the hard part.
#funding = {p: donation_df[p].sum() for p in projects}
funding = {p: 0 for p in projects}
for p in projects:
# get the actual k values for this project using contributions and indicators.
# C will be used to build the matrix of k values.
# It is a matrix where rows are wallets, columns are clusters, and the ith row of the matrix just has wallet i's contribution to the project in every entry.
C = pd.DataFrame(index=donors, columns = ['_'], data = donation_df[p].values).dot(pd.DataFrame(index= ['_'], columns = clusters, data=1))
# C is attained by taking the matrix multiplication of the column vector donation_df[p] (which is every agent's donation to project p) and a row vector with as many columns as projects, and a 1 in every entry
# the above line is so long mainly because you need to cast Pandas series' (i.e. vectors) as dataframes (i.e. matrices) for the matrix multiplication to work.
# now, K is a matrix where rows are wallets, columns are projects, and entry i,g ranges between c_i and sqrt(c_i) depending on i's relationship with cluster g and whether "fancy" was set to true or not.
K = (k_indicators * C.pow(1/2)) + ((1 - k_indicators) * C)
# Now we have all the k values, which are one of the items inside the innermost sum expressed in COCM.
# the other component of these sums is a division of each k value by the number of groups that user is in.
# P_prime is a matrix that combines k values and total group memberships to attain the value inside the aforementioned innermost sum.
# In other words, entry g,h of P_prime is:
#
# sum_{i in g} K(i,h) / T_i
#
# where T_i is the total number of groups that i is in
P_prime = K.transpose().dot(normalized_clusters)
# Now, we can create P_prime, whose non-diagonal entries g,h represent the pairwise subsidy given to the pair of groups g and h.
P = (P_prime * P_prime.transpose()).pow(1/2)
# The diagonal entries of P are not relevant, so get rid of them. We only care about the pairwise subsidies between distinct groups.
np.fill_diagonal(P.values, 0)
# Now the sum of every entry in P is the amount of subsidy funding COCM awards to the project.
funding[p] += P.sum().sum()
return funding
def standard_donation(donation_df):
# just do a normal vote (nothing quadratic)
projects = donation_df.columns
funding = {p: donation_df[p].sum() for p in projects}
return funding
def apply_sliding_scale(votes_data):
# Define the score range and corresponding scaling factors
score_range = np.array([15, 25])
scale_range = np.array([0.5, 1.0])
# Apply the scaling factor to the 'amountUSD' column based on the 'score' column
if 'starting_amountUSD' not in votes_data.columns:
votes_data['starting_amountUSD'] = votes_data['amountUSD']
else:
votes_data['amountUSD'] = votes_data['starting_amountUSD']
votes_data['amountUSD'] = votes_data['amountUSD'] * np.interp(votes_data['score'], score_range, scale_range)
# If the score is below 15, set the scaling factor to 0
votes_data.loc[votes_data['score'] < 15, 'amountUSD'] = 0
# If the score is above 25, set the scaling factor to 1
votes_data.loc[votes_data['score'] >= 25, 'amountUSD'] = votes_data['starting_amountUSD']
return votes_data
def flag_base_votes(votes, min_donation_threshold, score_threshold):
votes['self_vote'] = (votes['voter'] == votes['payoutAddress']).astype(int)
votes['low_score'] = (votes['score'] < score_threshold).astype(int)
votes['low_amount'] = (votes['amountUSD'] < min_donation_threshold).astype(int)
# base votes are when low_amount = 0, low_score = 0, self_vote = 0
votes['base_vote'] = np.all([votes['low_amount'] == 0, votes['low_score'] == 0, votes['self_vote'] == 0], axis=0).astype(int)
return votes
def prep_donations_data(votes_data, min_donation_threshold, score_threshold):
votes_data = flag_base_votes(votes_data, min_donation_threshold, score_threshold)
if 'starting_amountUSD' not in votes_data.columns:
votes_data['starting_amountUSD'] = votes_data['amountUSD']
else:
votes_data['amountUSD'] = votes_data['starting_amountUSD']
votes_data.loc[votes_data['base_vote'] == 0, 'amountUSD'] = 0
#votes_data = apply_sliding_scale(votes_data)
return votes_data
def pivot_votes(round_votes):
pivot_votes = round_votes.pivot_table(index='voter', columns='project_name', values='amountUSD', fill_value=0)
return pivot_votes
def get_qf_matching(algo, donation_df, matching_cap_percent, matching_amount, cluster_df = None):
projects = donation_df.columns
if algo == 'donation_profile_clustermatch':
funding = donation_profile_clustermatch(donation_df)
elif algo == 'COCM':
funding = COCM(donation_df, cluster_df)
elif algo == 'pairwise':
funding = pairwise(donation_df)
else:
funding = standard_qf(donation_df)
total_money = sum(funding.values())
funding_normalized = {p: funding[p]/total_money for p in projects}
# Create DataFrame with 'project_name' and 'matching_amount' columns
result = pd.DataFrame(list(funding_normalized.items()), columns=['project_name', 'matching_amount'])
# Apply the cap to the 'matching_amount' column
if matching_cap_percent < 100.0:
result['matching_amount'] = check_matching_cap(result['matching_amount'], matching_cap_percent/100)
# Scale the 'matching_amount' column by the total matching amount
result['matching_percent'] = result['matching_amount'] * 100
result['matching_amount'] = result['matching_amount'] * matching_amount
return result