-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathcompute_metrics.py
110 lines (84 loc) · 3.46 KB
/
compute_metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import os
import argparse
import torch
import json
import evaluation
import numpy as np
from models.clip import clip
from evaluation import PACScore, RefPACScore
from models import open_clip
_MODELS = {
"ViT-B/32": "checkpoints/clip_ViT-B-32.pth",
"open_clip_ViT-L/14": "checkpoints/openClip_ViT-L-14.pth"
}
def compute_scores(model, preprocess, image_ids, candidates, references, args):
gen = {}
gts = {}
ims_cs = list()
gen_cs = list()
gts_cs = list()
all_scores = dict()
model.eval()
for i, (im_i, gts_i, gen_i) in enumerate(zip(image_ids, references, candidates)):
gen['%d' % (i)] = [gen_i, ]
gts['%d' % (i)] = gts_i
ims_cs.append(im_i)
gen_cs.append(gen_i)
gts_cs.append(gts_i)
gts = evaluation.PTBTokenizer.tokenize(gts)
gen = evaluation.PTBTokenizer.tokenize(gen)
all_scores_metrics = evaluation.get_all_metrics(gts, gen)
for k, v in all_scores_metrics.items():
if k == 'BLEU':
all_scores['BLEU-1'] = v[0]
all_scores['BLEU-4'] = v[-1]
else:
all_scores[k] = v
# PAC-S
_, pac_scores, candidate_feats, len_candidates = PACScore(
model, preprocess, ims_cs, gen_cs, device, w=2.0)
all_scores['PAC-S'] = np.mean(pac_scores)
# RefPAC-S
if args.compute_refpac:
_, per_instance_text_text = RefPACScore(
model, gts_cs, candidate_feats, device, torch.tensor(len_candidates))
refpac_scores = 2 * pac_scores * per_instance_text_text / \
(pac_scores + per_instance_text_text)
all_scores['RefPAC-S'] = np.mean(refpac_scores)
return all_scores
if __name__ == '__main__':
# Argument parsing
parser = argparse.ArgumentParser(description='PAC-S evaluation')
parser.add_argument('--clip_model', type=str, default='ViT-B/32',
choices=['ViT-B/32', 'open_clip_ViT-L/14'])
parser.add_argument('--image_dir', type=str, default='example/images')
parser.add_argument('--candidates_json', type=str,
default='example/good_captions.json')
parser.add_argument('--references_json', type=str, default='example/refs.json')
parser.add_argument('--compute_refpac', action='store_true')
args = parser.parse_args()
device = "cuda" if torch.cuda.is_available() else "cpu"
image_ids = [img_id for img_id in os.listdir(args.image_dir)]
with open(args.candidates_json) as f:
candidates = json.load(f)
candidates = [candidates[cid.split('.')[0]] for cid in image_ids]
with open(args.references_json) as f:
references = json.load(f)
references = [references[cid.split('.')[0]] for cid in image_ids]
image_ids = [os.path.join(args.image_dir, img_id) for img_id in image_ids]
if args.clip_model.startswith('open_clip'):
print("Using Open CLIP Model: " + args.clip_model)
model, _, preprocess = open_clip.create_model_and_transforms(
'ViT-L-14', pretrained='laion2b_s32b_b82k')
else:
print("Using CLIP Model: " + args.clip_model)
model, preprocess = clip.load(args.clip_model, device=device)
model = model.to(device)
model = model.float()
checkpoint = torch.load(_MODELS[args.clip_model])
model.load_state_dict(checkpoint['state_dict'])
model.eval()
all_scores = compute_scores(
model, preprocess, image_ids, candidates, references, args)
for k, v in all_scores.items():
print('%s: %.4f' % (k, v))