-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathEvaluationMetrics.py
68 lines (51 loc) · 2.78 KB
/
EvaluationMetrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#########################################################################################
# EvaluationMetrics.py
# Purpose: script contains methods for evaluating link prediction algorithm performance
# version 1.0
# date: 01.28.2017
#########################################################################################
# import module/script dependencies
import random
import heapq
def AUC(nonexist_scores, missing_scores):
'''
Function calculates the probability that a randomly chosen missing link is given a higher score than a randomly
chosen nonexistent link using procedures described by Lu & Zhou (2010)
(doi:http://dx.doi.org/10.1016/j.physa.2010.11.027T). The function takes a list containing missing, non-existent
edges, and predictions, and returns an AUC score. Current method is currently set to perform 1,000 comparisons.
:param nonexist_scores: list of nonexistent edges and scores
:param missing_scores: list of test edges and scores
:return: an integer which is the AUC score for that comparison
'''
# comparisons = len(nonexist_scores)*len(missing_scores) #HOW MANY??
comparisons = 1000
count = 0.0
for i in xrange(comparisons):
TN = random.sample(nonexist_scores.values(), 1)
TP = random.sample(missing_scores.values(), 1)
if TP > TN:
count += 1.0
if TP == TN:
count += 0.5
auc = count/comparisons
return auc
def KPrecision(auc, scores, testing_edges):
'''
Function calculates the ratio of relevant items selected from the top n items using procedures described by Lu &
Zhou (2010) (doi:http://dx.doi.org/10.1016/j.physa.2010.11.027T). The function takes a list containing missing and
non-existent edges, the list of all predictions, and returns a top k-precision score for 20% of scores.
:param auc: integer representing AUC score - to indicate whether the top or bottom of list should be assessed
:param scores: list of test edges and scores
:param testing_edges: list of nonexistent edges and scores
:return: an integer which is the precision for the top number of selected links
'''
#get 20% of edges
links = [int(len(scores)*0.20) if int(len(scores)*0.20) >= 1 else 1][0]
if auc < 0.5:
# pred_list = heapq.nsmallest(links, set([x[1] for x in scores.items()])) #returns n highest likelihood scores
pred_list = heapq.nsmallest(links, scores, key=lambda k: scores[k])
else:
# pred_list = heapq.nlargest(links, set([x[1] for x in scores.items()])) #returns n highest likelihood scores
pred_list = heapq.nlargest(links, scores, key=lambda k: scores[k])
y_pred = [0 if (i[0], i[1]) in testing_edges else 0 if (i[1], i[0]) in testing_edges else 1 for i in pred_list]
return float(y_pred.count(0))/links