-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathmetrics.py
507 lines (378 loc) · 16.3 KB
/
metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
"""
Appointment Metrics
-------------------
Functions to analyze the results of appointments, allocations and other political science scenarios.
Based on
Flynn, C. voting: Diversity / (dis)proportionality measures, election quotas, and apportionment methods in pure
Python. (2020).
URL: /~https://github.com/crflynn/voting
License: /~https://github.com/crflynn/voting/blob/master/LICENSE.txt
Kohler, U., and Zeh, J. (2012). “Apportionment methods”.
The Stata Journal, Vol. 12, No. 3, pp. 375–392.
URL: https://journals.sagepub.com/doi/pdf/10.1177/1536867X1201200303
Karpov, A. (2008). "Measurement of disproportionality in proportional representation systems".
Mathematical and Computer Modelling, Vol. 48, 1421-1438.
URL: https://www.sciencedirect.com/science/article/pii/S0895717708001933
Taagepera, R., Grofman, B. (2003). "Mapping the Indices of Seats-Votes Disproportionality and Inter-Election Volatility". Party Politics, Vol. 9, No. 6, pp. 659–677.
URL: https://escholarship.org/content/qt0m9912ff/qt0m9912ff.pdf.
Contents:
ideal_share,
alloc_to_share_ratio,
sqr_alloc_to_share_error,
total_alloc_to_share_error,
rep_weight,
sqr_rep_weight_error,
total_rep_weight_error,
div_index,
effective_number_of_groups
dispr_index
"""
from math import exp, log, sqrt
from poli_sci_kit.utils import normalize
from scipy.stats import linregress
def ideal_share(share, total_shares, total_alloc):
"""
Calculate the ideal share of proportions and totals.
Parameters
----------
share : int
The proportion to be checked.
total_shares : int
The total amount of shares.
total_alloc : int
The number of allocations to provide.
Returns
-------
ideal : float
The ideal share that would be allocated.
"""
return 1.0 * share / total_shares * total_alloc
def alloc_to_share_ratio(share, total_shares, allocation, total_alloc):
"""
Calculate the allocation to share (advantage) ratio given to a region or group.
Parameters
----------
share : int
The proportion to be checked.
total_shares : int
The total amount of shares.
allocation : int
The share of allocations given to the region or group.
total_alloc : int
The number of allocations to provide.
Returns
-------
asr : float
The ratio of the allocations the region or group received to their proportion of the original shares.
"""
return 1.0 * (allocation / total_alloc) / (share / total_shares)
def sqr_alloc_to_share_error(share, total_shares, allocation, total_alloc):
"""
Calculate the squared error of an assignment's allocation to share ratio for a population or group.
Parameters
----------
share : int
The proportion to be checked.
total_shares : int
The total amount of shares.
allocation : int
The share of allocations given to the region or group.
total_alloc : int
The number of allocations to provide.
Returns
-------
sqr_asr_err : float
The squared of the error of the allocation to share ratio.
"""
asr = alloc_to_share_ratio(
share=share,
total_shares=total_shares,
allocation=allocation,
total_alloc=total_alloc,
)
return (asr - 1) ** 2
def total_alloc_to_share_error(shares, allocations, proportional=True):
"""
Calculate the total squared error of an assignment's allocation to share ratio.
Parameters
----------
shares : list
The proportion of the original shares for the regions or groups.
allocations : list
The share of allocations given to the regions or groups.
proportional : bool (default=False)
Whether the assignment's error is calculated as proportional to the region or group shares.
Returns
-------
total_asr_err : float
The summation of the allocation to share ratio error for all populations or groups.
"""
assert len(shares) == len(
allocations
), "The total different shares of a population or vote must equal that of the allocations."
sum_share = sum(shares)
sum_allocations = sum(allocations)
sqr_asr_errors = [
sqr_alloc_to_share_error(
share=s,
total_shares=sum_share,
allocation=allocations[i],
total_alloc=sum_allocations,
)
for i, s in enumerate(shares)
]
if proportional:
proportional_errors = [
s / sum_share * sqr_asr_errors[i] for i, s in enumerate(shares)
]
return sum(proportional_errors)
else:
return sum(sqr_asr_errors)
def rep_weight(share, allocation):
"""
Calculate the representative weight of an allocation to a region or group.
Parameters
----------
share : int
The proportion to be checked.
allocation : int
The allocation provided.
Returns
-------
rep_weight : float
The number of shares per allocation.
"""
return share / allocation
def sqr_rep_weight_error(share, total_shares, allocation, total_alloc):
"""
Calculate the squared error of an assignment's representative weight for a population or group.
Parameters
----------
share : int
The proportion to be checked.
total_shares : int
The total amount of shares.
allocation : int
The share of allocations given to the region or group.
total_alloc : int
The number of allocations to provide.
Returns
-------
sqr_rw_err : float
The squared of the error of the allocation to share ratio.
"""
rw = rep_weight(share=share, allocation=allocation)
return (rw - total_shares / total_alloc) ** 2
def total_rep_weight_error(shares, allocations, proportional=True):
"""
Calculate the total squared error of an assignment's representative weight error.
Parameters
----------
shares : list
The proportion of the original shares for the regions or groups.
allocations : list
The share of allocations given to the regions or groups.
proportional : bool (default=False)
Whether the assignment's error is calculated as proportional to the region or group shares.
Returns
-------
total_rw_err : float
The summation of the representative weight error for all populations or groups.
"""
assert len(shares) == len(
allocations
), "The total different shares of a population or vote must equal that of the allocations."
sum_share = sum(shares)
sum_allocations = sum(allocations)
sqr_rw_errors = [
sqr_rep_weight_error(
share=s,
total_shares=sum_share,
allocation=allocations[i],
total_alloc=sum_allocations,
)
for i, s in enumerate(shares)
]
if proportional:
proportional_errors = [
s / sum_share * sqr_rw_errors[i] for i, s in enumerate(shares)
]
return sum(proportional_errors)
else:
return sum(sqr_rw_errors)
def div_index(shares, q=None, metric_type="Shannon"):
"""
Calculates the diversity index: the uncertainty associated with predicting further elements within the vote or population distributions.
Parameters
----------
shares : list
The proportion of the original shares for the regions or groups.
q : float
The order of diversity (a weight value for the sensitivity of the diversity value to rare vs. abundant).
metric_type : str (default=Shannon)
The type of formula to use.
Options:
The available measures of diversity.
- Shannon : approaches zero (one) when shares are concentrated (dispersed), uncertainty (certainty) of the next element goes to zero.
- Renyi : generalization of the Shannon diversity.
- Simpson : probability that two entities taken at random from the dataset of interest represent the same type (assumes replacement).
- Gini-Simpson : opposite of the Simpson diversity, the probability that two entities are from different types.
- Berger-Parker : proportional abundance of the most abundant type.
- Effective : number of equally abundant types needed for the average proportional abundance of types to equal that of the dataset.
Returns
-------
index : float
The measure of diversity given the share distribution.
"""
norm_shares = normalize(vals=shares)
if metric_type == "Shannon":
index = -1 * sum(share * log(share) for share in norm_shares)
elif metric_type == "Renyi":
assert (
q
), "The order of diversity 'q' argument must be used with Renyi diversity calculations."
index = 1.0 / (1 - q) * log(sum(share ** q for share in norm_shares))
elif metric_type == "Simpson":
index = sum(share ** 2 for share in norm_shares)
elif metric_type == "Gini-Simpson":
index = 1 - sum(share ** 2 for share in norm_shares)
elif metric_type == "Berger-Parker":
index = max(norm_shares)
elif metric_type == "Effective":
assert (
q
), "The order of diversity 'q' argument must be used with Effective diversity calculations."
if q == 1:
index = exp(div_index(shares=shares, q=None, metric_type="Shannon"))
else:
index = sum(share ** q for share in norm_shares) ** (1.0 / (1 - q))
else:
ValueError(
f"{metric_type} is not a valid value for the 'metric_type' argument."
)
return index
def effective_number_of_groups(shares, metric_type="Laakso-Taagepera"):
"""
Calculates the effective number of groups given vote or population distributions.
Parameters
----------
shares : list
The proportion of the original shares for the regions or groups.
metric_type : str (default=Laakso-Taagepera, options=Golosov, Inverse-Simpson)
The type of formula to use.
Returns
-------
num_groups : float
A float representing the efficient number of groups given the share distributions.
"""
norm_shares = normalize(vals=shares)
if metric_type == "Laakso-Taagepera":
return 1.0 / sum(share ** 2 for share in norm_shares)
elif metric_type == "Golosov":
max_share = max(shares)
return sum(
share / (share + max_share ** 2 - share ** 2) for share in norm_shares
)
elif metric_type == "Inverse-Simpson":
return 1.0 / div_index(shares=shares, metric_type="Shannon")
def dispr_index(shares, allocations, metric_type="Gallagher"):
"""
Measures of the degree to which the actual allocations deviates from the shares, with larger indexes implying greater disproportionality.
Parameters
----------
shares : list
The proportion of the original shares for the regions or groups.
allocations : list
The share of allocations given to the regions or groups.
metric_type : str (default=Gallagher)
The type of formula to use.
Options:
The available measures of disproportionality.
- Gallagher : measure of absolute difference in percent of allocations received to true proportion.
Note 1: accounts for magnitudes of the individual shifts.
Note 2: deals with the magnitudes of the disproportionality, not the percentage differences from ideality.
Note 3: a general form with k instead of the square root, 1/2 and second power is not monotone to k, as is thus not included.
- Loosemore–Hanby : the total excess of allocated shares of overrepresented groups over the exact quota and the total shortage accruing to other groups.
Note 1: is not consistent (it fails Dalton's principle of transfers, where transfering shares may lead to adverse effects on allocations).
Note 2: does not account for the magnitude of individual disproportionality (that few large shifts should potentially be worse than many small).
- Rose : 100 minus the Loosemore–Hanby index, so in this case larger numbers are better (suffers from similar issues).
- Rae : measure of the average absolute difference in percent of allocations received to true proportion.
Note 1: includes the number of groups in the calculation, and thus is effected if there are many small groups.
Note 2: don't use to compare appointments across situations with different numbers of groups.
- Sainte-Laguë (chi-squared) : measure of relative difference in percent of allocations received to true proportion.
Note 1: has no upper limit.
Note 2: downplays the disproportionality that effects larger groups.
Note 3: sensitive to if there are is large portion of the shares that are 'other' and don't receive votes.
- d’Hondt : measure of relative difference in percent of allocations received to true proportion.
Note: does not account for the magnitude of individual disproportionality (that few large shifts should be worse than many small).
- Cox-Shugart : the slope of the line of best fit between the shares and allocations.
Note 1: main advantage is directly showing whether larger or smaller groups are benefitting (>1 or <1 respectively).
Note 2: this index can be negative, and if it is, that implies a negative shares-allocations ratio.
Note: the Gini index as a measure of disproportionality is not included, as in many cases smaller groups have a greater allocation proportion.
Returns
-------
index : float
A measure of disproportionality between allocations and original shares.
"""
assert len(shares) == len(
allocations
), "The number of different shares must equal the number of different allocations."
available_metrics = [
"Gallagher",
"Loosemore–Hanby",
"Rose",
"Rae",
"Sainte-Laguë",
"d’Hondt",
"Cox-Shugart",
"Gini",
]
assert metric_type in available_metrics, (
f"{metric_type} is not a valid value for the 'metric_type' argument. Please choose from the following options: "
+ ", ".join(available_metrics)
+ "."
)
norm_shares = normalize(vals=shares)
norm_allocations = normalize(vals=allocations)
if metric_type == "Gallagher":
index = sqrt(1.0 / 2) * sqrt(
sum(
(share - allocation) ** 2
for share, allocation in zip(norm_shares, norm_allocations)
)
)
elif metric_type == "Loosemore–Hanby":
index = (
1.0
/ 2
* sum(
abs(share - allocation)
for share, allocation in zip(norm_shares, norm_allocations)
)
)
elif metric_type == "Rose":
index = 100 - dispr_index(
shares=shares, allocations=allocations, metric_type="Loosemore–Hanby"
)
elif metric_type == "Rae":
index = (
1.0
/ len(norm_shares)
* sum(
abs(share - allocation)
for share, allocation in zip(norm_shares, norm_allocations)
)
)
elif metric_type in ["Sainte-Laguë", "Sainte-Lague"]:
index = sum(
1.0 / share * (share - allocation) ** 2
for share, allocation in zip(norm_shares, norm_allocations)
)
elif metric_type in ["dHondt", "dhondt", "d’Hondt", "d’hondt"]:
index = max(
1.0 * allocation / share
for share, allocation in zip(norm_shares, norm_allocations)
)
elif metric_type == "Cox-Shugart":
index = linregress(shares, allocations)[0]
return index