grit
grit copied to clipboard
The help for the evaluation
Hi, thank you for sharing this great work.I'm trying to test the effects of training,But I found that there were some missing code about the TABLE 7,such as 'Object Attr. Relation Color Count Size CLIP',So how do I get it?
Thanks for asking! May you find the evaluation code/notebook from M2 Trasnformer repo? I remember that I took the code from there.
Thanks for asking! May you find the evaluation code/notebook from M2 Trasnformer repo? I remember that I took the code from there.
Sorry,I browsed the code about M2 Trasnformer,but I can not find what I need.it is only have 'blue cider, meteor,rouge'
Sorry for my bad memory. Luckily, I've just checked the old code and found that the evaluation based on this repo, not M2 transformer: https://github.com/salaniz/pycocoevalcap.
To get 'Object Attr. Relation Color Count Size', you need to modify the code of https://github.com/salaniz/pycocoevalcap/blob/master/spice/spice.py as follows:
from __future__ import division
import os
import sys
import subprocess
import threading
import json
import numpy as np
import ast
import tempfile
import math
# Assumes spice.jar is in the same directory as spice.py. Change as needed.
SPICE_JAR = 'spice-1.0.jar'
TEMP_DIR = 'tmp'
CACHE_DIR = 'cache'
class Spice:
"""
Main Class to compute the SPICE metric
"""
def float_convert(self, obj):
try:
return float(obj)
except:
return np.nan
def compute_score(self, gts, res):
assert (sorted(gts.keys()) == sorted(res.keys()))
imgIds = sorted(gts.keys())
# Prepare temp input file for the SPICE scorer
input_data = []
for id in imgIds:
hypo = res[id]
ref = gts[id]
# Sanity check.
assert (type(hypo) is list)
assert (len(hypo) == 1)
assert (type(ref) is list)
assert (len(ref) >= 1)
input_data.append({"image_id": id, "test": hypo[0], "refs": ref})
cwd = os.path.dirname(os.path.abspath(__file__))
temp_dir = os.path.join(cwd, TEMP_DIR)
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
in_file = tempfile.NamedTemporaryFile(mode='w+', delete=False, dir=temp_dir)
json.dump(input_data, in_file, indent=2)
in_file.close()
# Start job
out_file = tempfile.NamedTemporaryFile(mode='w+', delete=False, dir=temp_dir)
out_file.close()
cache_dir = os.path.join(cwd, CACHE_DIR)
if not os.path.exists(cache_dir):
os.makedirs(cache_dir)
spice_cmd = [
'java', '-jar', '-Xmx8G', SPICE_JAR, in_file.name, '-cache', cache_dir, '-out', out_file.name, '-subset',
'-silent'
]
subprocess.check_call(spice_cmd, cwd=os.path.dirname(os.path.abspath(__file__)))
# Read and process results
with open(out_file.name) as data_file:
results = json.load(data_file)
os.remove(in_file.name)
os.remove(out_file.name)
imgId_to_scores = {}
spice_scores = []
keys = ['Relation', 'Cardinality', 'Color', 'Attribute', 'Object', 'Size']
other_scores = {key: [] for key in keys}
for item in results:
imgId_to_scores[item['image_id']] = item['scores']
spice_scores.append(self.float_convert(item['scores']['All']['f']))
for key in keys:
value = self.float_convert(item['scores'][key]['f'])
if not math.isnan(value):
other_scores[key].append(value)
for key in keys:
score = np.mean(np.array(other_scores[key]))
print(f"SPICE key: {key} = {score}")
average_score = np.mean(np.array(spice_scores))
scores = []
for image_id in imgIds:
# Convert none to NaN before saving scores over subcategories
score_set = {}
for category, score_tuple in imgId_to_scores[image_id].items():
score_set[category] = {k: self.float_convert(v) for k, v in score_tuple.items()}
scores.append(score_set)
print(f"SPICE Score: avg = {average_score}")
return average_score, scores
def method(self):
return "SPICE"
I can't find my old code for CLIP score. However, you can easily follow this repo to compute: https://github.com/jmhessel/clipscore